From 4281461c01f702f9427554718988b5e8fbfd64fb Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 8 Oct 2020 16:22:10 +0900 Subject: [PATCH 01/86] trace: events: devfreq: Use fixed indentation size to improve readability Each tracepoint infromation consist of the different size value. So, in order to improve the readability, use the fixed indentation size. Signed-off-by: Chanwoo Choi --- include/trace/events/devfreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/devfreq.h b/include/trace/events/devfreq.h index cf5b8772175d..bd36d28d16bc 100644 --- a/include/trace/events/devfreq.h +++ b/include/trace/events/devfreq.h @@ -29,7 +29,7 @@ TRACE_EVENT(devfreq_monitor, __assign_str(dev_name, dev_name(&devfreq->dev)); ), - TP_printk("dev_name=%s freq=%lu polling_ms=%u load=%lu", + TP_printk("dev_name=%-30s freq=%-12lu polling_ms=%-3u load=%-2lu", __get_str(dev_name), __entry->freq, __entry->polling_ms, __entry->total_time == 0 ? 0 : (100 * __entry->busy_time) / __entry->total_time) From b4365423bb7adf9feb4659126eaec374dfbde806 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 7 Oct 2020 22:02:39 +0900 Subject: [PATCH 02/86] PM / devfreq: Unify frequency change to devfreq_update_target func The update_devfreq() and update_passive_devfreq() have the duplicate code when changing the target frequency on final stage. So, unify frequency change code to devfreq_update_target() to remove the duplicate code and to centralize the frequency change code. Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 29 ++++++++++++++++----- drivers/devfreq/governor.h | 1 + drivers/devfreq/governor_passive.c | 42 +++++++----------------------- 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 861c100f9fac..ab5dd9d5fdc7 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -382,18 +382,19 @@ static int devfreq_set_target(struct devfreq *devfreq, unsigned long new_freq, return err; } -/* Load monitoring helper functions for governors use */ - /** - * update_devfreq() - Reevaluate the device and configure frequency. + * devfreq_update_target() - Reevaluate the device and configure frequency + * on the final stage. * @devfreq: the devfreq instance. + * @freq: the new frequency of parent device. This argument + * is only used for devfreq device using passive governor. * - * Note: Lock devfreq->lock before calling update_devfreq - * This function is exported for governors. + * Note: Lock devfreq->lock before calling devfreq_update_target. This function + * should be only used by both update_devfreq() and devfreq governors. */ -int update_devfreq(struct devfreq *devfreq) +int devfreq_update_target(struct devfreq *devfreq, unsigned long freq) { - unsigned long freq, min_freq, max_freq; + unsigned long min_freq, max_freq; int err = 0; u32 flags = 0; @@ -418,7 +419,21 @@ int update_devfreq(struct devfreq *devfreq) } return devfreq_set_target(devfreq, freq, flags); +} +EXPORT_SYMBOL(devfreq_update_target); +/* Load monitoring helper functions for governors use */ + +/** + * update_devfreq() - Reevaluate the device and configure frequency. + * @devfreq: the devfreq instance. + * + * Note: Lock devfreq->lock before calling update_devfreq + * This function is exported for governors. + */ +int update_devfreq(struct devfreq *devfreq) +{ + return devfreq_update_target(devfreq, 0L); } EXPORT_SYMBOL(update_devfreq); diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index ae4d0cc18359..02cf876244d6 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -67,6 +67,7 @@ int devfreq_add_governor(struct devfreq_governor *governor); int devfreq_remove_governor(struct devfreq_governor *governor); int devfreq_update_status(struct devfreq *devfreq, unsigned long freq); +int devfreq_update_target(struct devfreq *devfreq, unsigned long freq); static inline int devfreq_update_stats(struct devfreq *df) { diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index be6eeab9c814..53a1b1596232 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -92,36 +92,6 @@ out: return ret; } -static int update_devfreq_passive(struct devfreq *devfreq, unsigned long freq) -{ - int ret; - - if (!devfreq->governor) - return -EINVAL; - - mutex_lock_nested(&devfreq->lock, SINGLE_DEPTH_NESTING); - - ret = devfreq->governor->get_target_freq(devfreq, &freq); - if (ret < 0) - goto out; - - ret = devfreq->profile->target(devfreq->dev.parent, &freq, 0); - if (ret < 0) - goto out; - - if (devfreq->profile->freq_table - && (devfreq_update_status(devfreq, freq))) - dev_err(&devfreq->dev, - "Couldn't update frequency transition information.\n"); - - devfreq->previous_freq = freq; - -out: - mutex_unlock(&devfreq->lock); - - return 0; -} - static int devfreq_passive_notifier_call(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -131,17 +101,25 @@ static int devfreq_passive_notifier_call(struct notifier_block *nb, struct devfreq *parent = (struct devfreq *)data->parent; struct devfreq_freqs *freqs = (struct devfreq_freqs *)ptr; unsigned long freq = freqs->new; + int ret = 0; + mutex_lock_nested(&devfreq->lock, SINGLE_DEPTH_NESTING); switch (event) { case DEVFREQ_PRECHANGE: if (parent->previous_freq > freq) - update_devfreq_passive(devfreq, freq); + ret = devfreq_update_target(devfreq, freq); + break; case DEVFREQ_POSTCHANGE: if (parent->previous_freq < freq) - update_devfreq_passive(devfreq, freq); + ret = devfreq_update_target(devfreq, freq); break; } + mutex_unlock(&devfreq->lock); + + if (ret < 0) + dev_warn(&devfreq->dev, + "failed to update devfreq using passive governor\n"); return NOTIFY_DONE; } From cab477d0d4fbae1ed68d3db0b52cb5449a3c5868 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 7 Oct 2020 22:03:59 +0900 Subject: [PATCH 03/86] PM / devfreq: Add tracepoint for frequency changes Add a tracepoint for frequency changes of devfreq devices and use it. Signed-off-by: Matthias Kaehlcke [cw00.choi: Move print position of tracepoint and add more information] Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 8 ++++++++ include/trace/events/devfreq.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index ab5dd9d5fdc7..1b236b9e4d9e 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -367,6 +367,14 @@ static int devfreq_set_target(struct devfreq *devfreq, unsigned long new_freq, return err; } + /* + * Print devfreq_frequency trace information between DEVFREQ_PRECHANGE + * and DEVFREQ_POSTCHANGE because for showing the correct frequency + * change order of between devfreq device and passive devfreq device. + */ + if (trace_devfreq_frequency_enabled() && new_freq != cur_freq) + trace_devfreq_frequency(devfreq, new_freq, cur_freq); + freqs.new = new_freq; devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE); diff --git a/include/trace/events/devfreq.h b/include/trace/events/devfreq.h index bd36d28d16bc..7627c620bbda 100644 --- a/include/trace/events/devfreq.h +++ b/include/trace/events/devfreq.h @@ -8,6 +8,34 @@ #include #include +TRACE_EVENT(devfreq_frequency, + TP_PROTO(struct devfreq *devfreq, unsigned long freq, + unsigned long prev_freq), + + TP_ARGS(devfreq, freq, prev_freq), + + TP_STRUCT__entry( + __string(dev_name, dev_name(&devfreq->dev)) + __field(unsigned long, freq) + __field(unsigned long, prev_freq) + __field(unsigned long, busy_time) + __field(unsigned long, total_time) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name(&devfreq->dev)); + __entry->freq = freq; + __entry->prev_freq = prev_freq; + __entry->busy_time = devfreq->last_status.busy_time; + __entry->total_time = devfreq->last_status.total_time; + ), + + TP_printk("dev_name=%-30s freq=%-12lu prev_freq=%-12lu load=%-2lu", + __get_str(dev_name), __entry->freq, __entry->prev_freq, + __entry->total_time == 0 ? 0 : + (100 * __entry->busy_time) / __entry->total_time) +); + TRACE_EVENT(devfreq_monitor, TP_PROTO(struct devfreq *devfreq), From 0dd25a0d12a134cd2ba950d8c0530d4ece05c63b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 5 Oct 2020 14:48:01 +0900 Subject: [PATCH 04/86] PM / devfreq: Add governor feature flag The devfreq governor is able to have the specific flag as follows in order to implement the specific feature. For example, devfreq allows user to change the governors on runtime via sysfs interface. But, if devfreq device uses 'passive' governor, don't allow user to change the governor. For this case, define the DEVFREQ_GOV_FLAG_IMMUTABLE and set it to flag of passive governor. [Definition for governor flag] - DEVFREQ_GOV_FLAG_IMMUTABLE : If immutable flag is set, governor is never changeable to other governors. - DEVFREQ_GOV_FLAG_IRQ_DRIVEN : Devfreq core won't schedule polling work for this governor if value is set. [Table of governor flag for devfreq governors] ------------------------------------------------------------------------------ | simple | perfor | power | user | passive | tegra30 | ondemand | mance | save | space| | ------------------------------------------------------------------------------ immutable | X | X | X | X | O | O interrupt_driven | X(polling)| X | X | X | X | O (irq) ------------------------------------------------------------------------------ Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 25 ++++++++++++++----------- drivers/devfreq/governor.h | 18 ++++++++++++------ drivers/devfreq/governor_passive.c | 2 +- drivers/devfreq/tegra30-devfreq.c | 4 ++-- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 1b236b9e4d9e..a862acfe987e 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -31,6 +31,7 @@ #define CREATE_TRACE_POINTS #include +#define IS_SUPPORTED_FLAG(f, name) ((f & DEVFREQ_GOV_FLAG_##name) ? true : false) #define HZ_PER_KHZ 1000 static struct class *devfreq_class; @@ -479,7 +480,7 @@ static void devfreq_monitor(struct work_struct *work) */ void devfreq_monitor_start(struct devfreq *devfreq) { - if (devfreq->governor->interrupt_driven) + if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; switch (devfreq->profile->timer) { @@ -509,7 +510,7 @@ EXPORT_SYMBOL(devfreq_monitor_start); */ void devfreq_monitor_stop(struct devfreq *devfreq) { - if (devfreq->governor->interrupt_driven) + if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; cancel_delayed_work_sync(&devfreq->work); @@ -540,7 +541,7 @@ void devfreq_monitor_suspend(struct devfreq *devfreq) devfreq->stop_polling = true; mutex_unlock(&devfreq->lock); - if (devfreq->governor->interrupt_driven) + if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; cancel_delayed_work_sync(&devfreq->work); @@ -560,12 +561,13 @@ void devfreq_monitor_resume(struct devfreq *devfreq) unsigned long freq; mutex_lock(&devfreq->lock); + + if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) + goto out_update; + if (!devfreq->stop_polling) goto out; - if (devfreq->governor->interrupt_driven) - goto out_update; - if (!delayed_work_pending(&devfreq->work) && devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, @@ -600,10 +602,10 @@ void devfreq_update_interval(struct devfreq *devfreq, unsigned int *delay) mutex_lock(&devfreq->lock); devfreq->profile->polling_ms = new_delay; - if (devfreq->stop_polling) + if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) goto out; - if (devfreq->governor->interrupt_driven) + if (devfreq->stop_polling) goto out; /* if new delay is zero, stop polling */ @@ -1370,7 +1372,8 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, if (df->governor == governor) { ret = 0; goto out; - } else if (df->governor->immutable || governor->immutable) { + } else if (IS_SUPPORTED_FLAG(df->governor->flags, IMMUTABLE) + || IS_SUPPORTED_FLAG(governor->flags, IMMUTABLE)) { ret = -EINVAL; goto out; } @@ -1425,7 +1428,7 @@ static ssize_t available_governors_show(struct device *d, * The devfreq with immutable governor (e.g., passive) shows * only own governor. */ - if (df->governor->immutable) { + if (IS_SUPPORTED_FLAG(df->governor->flags, IMMUTABLE)) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, "%s ", df->governor_name); /* @@ -1436,7 +1439,7 @@ static ssize_t available_governors_show(struct device *d, struct devfreq_governor *governor; list_for_each_entry(governor, &devfreq_governor_list, node) { - if (governor->immutable) + if (IS_SUPPORTED_FLAG(governor->flags, IMMUTABLE)) continue; count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), "%s ", governor->name); diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 02cf876244d6..7dbb110a869e 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -25,14 +25,21 @@ #define DEVFREQ_MIN_FREQ 0 #define DEVFREQ_MAX_FREQ ULONG_MAX +/* + * Definition of the governor feature flags + * - DEVFREQ_GOV_FLAG_IMMUTABLE + * : This governor is never changeable to other governors. + * - DEVFREQ_GOV_FLAG_IRQ_DRIVEN + * : The devfreq won't schedule the work for this governor. + */ +#define DEVFREQ_GOV_FLAG_IMMUTABLE BIT(0) +#define DEVFREQ_GOV_FLAG_IRQ_DRIVEN BIT(1) + /** * struct devfreq_governor - Devfreq policy governor * @node: list node - contains registered devfreq governors * @name: Governor's name - * @immutable: Immutable flag for governor. If the value is 1, - * this governor is never changeable to other governor. - * @interrupt_driven: Devfreq core won't schedule polling work for this - * governor if value is set to 1. + * @flags: Governor's feature flags * @get_target_freq: Returns desired operating frequency for the device. * Basically, get_target_freq will run * devfreq_dev_profile.get_dev_status() to get the @@ -50,8 +57,7 @@ struct devfreq_governor { struct list_head node; const char name[DEVFREQ_NAME_LEN]; - const unsigned int immutable; - const unsigned int interrupt_driven; + const u64 flags; int (*get_target_freq)(struct devfreq *this, unsigned long *freq); int (*event_handler)(struct devfreq *devfreq, unsigned int event, void *data); diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 53a1b1596232..63332e4a65ae 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -158,7 +158,7 @@ static int devfreq_passive_event_handler(struct devfreq *devfreq, static struct devfreq_governor devfreq_passive = { .name = DEVFREQ_GOV_PASSIVE, - .immutable = 1, + .flags = DEVFREQ_GOV_FLAG_IMMUTABLE, .get_target_freq = devfreq_passive_get_target_freq, .event_handler = devfreq_passive_event_handler, }; diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index f5e74c2ede85..faa1aecf2a31 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -765,10 +765,10 @@ static int tegra_governor_event_handler(struct devfreq *devfreq, static struct devfreq_governor tegra_devfreq_governor = { .name = "tegra_actmon", + .flags = DEVFREQ_GOV_FLAG_IMMUTABLE + | DEVFREQ_GOV_FLAG_IRQ_DRIVEN, .get_target_freq = tegra_governor_get_target, .event_handler = tegra_governor_event_handler, - .immutable = true, - .interrupt_driven = true, }; static int tegra_devfreq_probe(struct platform_device *pdev) From 5f1a9066fcb2cc1d41104c74884f6c6cf010124b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 3 Jul 2020 17:20:27 +0900 Subject: [PATCH 05/86] PM / devfreq: Add governor attribute flag for specifc sysfs nodes DEVFREQ supports the default governors like performance, simple_ondemand and also allows the devfreq driver to add their own governor like tegra30-devfreq.c according to their requirement. In result, some sysfs attributes are useful or not useful. Prior to that the user can access all sysfs attributes regardless of the available attributes. So, clarify the access permission of sysfs attributes according to governor. When adding the devfreq governor, can specify the available attribute information by using DEVFREQ_GOV_ATTR_* constant variable. The user can read or write the sysfs attributes in accordance to the specified attributes. When adding the governor, can add the following attributes according to the governor feature. [Definition for speific sysfs attributes] - DEVFREQ_GOV_ATTR_POLLING_INTERVAL to update polling interval for timer. : /sys/class/devfreq/[devfreq dev name]/polling_interval - DEVFREQ_GOV_ATTR_TIMER to change the type of timer on either deferrable or dealyed timer. : /sys/class/devfreq/[devfreq dev name]/timer And all devfreq governors have to support the following common attributes. The common attributes are added to devfreq class by default. - governor - available_governors - available_frequencies - cur_freq - target_freq - min_freq - max_freq - trans_stat [Table of governor attribute flags for devfreq governors] ------------------------------------------------------------------------------ | simple | perfor | power | user | passive | tegra30 | ondemand | mance | save | space| | ------------------------------------------------------------------------------ governor | O | O | O | O | O | O available_governors | O | O | O | O | O | O available_frequencies | O | O | O | O | O | O cur_freq | O | O | O | O | O | O target_freq | O | O | O | O | O | O min_freq | O | O | O | O | O | O max_freq | O | O | O | O | O | O trans_stat | O | O | O | O | O | O -------------------------------------------------------- polling_interval | O | X | X | X | X | O timer | O | X | X | X | X | X ------------------------------------------------------------------------------ Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- Documentation/ABI/testing/sysfs-class-devfreq | 54 +++--- drivers/devfreq/devfreq.c | 162 ++++++++++++------ drivers/devfreq/governor.h | 12 ++ drivers/devfreq/governor_simpleondemand.c | 2 + drivers/devfreq/tegra30-devfreq.c | 1 + 5 files changed, 159 insertions(+), 72 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index deefffb3bbe4..67af3f31e17c 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -37,20 +37,6 @@ Description: The /sys/class/devfreq/.../target_freq shows the next governor predicted target frequency of the corresponding devfreq object. -What: /sys/class/devfreq/.../polling_interval -Date: September 2011 -Contact: MyungJoo Ham -Description: - The /sys/class/devfreq/.../polling_interval shows and sets - the requested polling interval of the corresponding devfreq - object. The values are represented in ms. If the value is - less than 1 jiffy, it is considered to be 0, which means - no polling. This value is meaningless if the governor is - not polling; thus. If the governor is not using - devfreq-provided central polling - (/sys/class/devfreq/.../central_polling is 0), this value - may be useless. - What: /sys/class/devfreq/.../trans_stat Date: October 2012 Contact: MyungJoo Ham @@ -65,14 +51,6 @@ Description: as following: echo 0 > /sys/class/devfreq/.../trans_stat -What: /sys/class/devfreq/.../userspace/set_freq -Date: September 2011 -Contact: MyungJoo Ham -Description: - The /sys/class/devfreq/.../userspace/set_freq shows and - sets the requested frequency for the devfreq object if - userspace governor is in effect. - What: /sys/class/devfreq/.../available_frequencies Date: October 2012 Contact: Nishanth Menon @@ -109,6 +87,35 @@ Description: The max_freq overrides min_freq because max_freq may be used to throttle devices to avoid overheating. +What: /sys/class/devfreq/.../polling_interval +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../polling_interval shows and sets + the requested polling interval of the corresponding devfreq + object. The values are represented in ms. If the value is + less than 1 jiffy, it is considered to be 0, which means + no polling. This value is meaningless if the governor is + not polling; thus. If the governor is not using + devfreq-provided central polling + (/sys/class/devfreq/.../central_polling is 0), this value + may be useless. + + A list of governors that support the node: + - simple_ondmenad + - tegra_actmon + +What: /sys/class/devfreq/.../userspace/set_freq +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../userspace/set_freq shows and + sets the requested frequency for the devfreq object if + userspace governor is in effect. + + A list of governors that support the node: + - userspace + What: /sys/class/devfreq/.../timer Date: July 2020 Contact: Chanwoo Choi @@ -120,3 +127,6 @@ Description: as following: echo deferrable > /sys/class/devfreq/.../timer echo delayed > /sys/class/devfreq/.../timer + + A list of governors that support the node: + - simple_ondemand diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index a862acfe987e..02cfc6552913 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -32,6 +32,7 @@ #include #define IS_SUPPORTED_FLAG(f, name) ((f & DEVFREQ_GOV_FLAG_##name) ? true : false) +#define IS_SUPPORTED_ATTR(f, name) ((f & DEVFREQ_GOV_ATTR_##name) ? true : false) #define HZ_PER_KHZ 1000 static struct class *devfreq_class; @@ -760,6 +761,11 @@ static void devfreq_dev_release(struct device *dev) kfree(devfreq); } +static void create_sysfs_files(struct devfreq *devfreq, + const struct devfreq_governor *gov); +static void remove_sysfs_files(struct devfreq *devfreq, + const struct devfreq_governor *gov); + /** * devfreq_add_device() - Add devfreq feature to the device * @dev: the device to add devfreq feature. @@ -917,6 +923,7 @@ struct devfreq *devfreq_add_device(struct device *dev, __func__); goto err_init; } + create_sysfs_files(devfreq, devfreq->governor); list_add(&devfreq->node, &devfreq_list); @@ -947,9 +954,12 @@ int devfreq_remove_device(struct devfreq *devfreq) if (!devfreq) return -EINVAL; - if (devfreq->governor) + if (devfreq->governor) { devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_STOP, NULL); + remove_sysfs_files(devfreq, devfreq->governor); + } + device_unregister(&devfreq->dev); return 0; @@ -1378,13 +1388,22 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, goto out; } + /* + * Stop the current governor and remove the specific sysfs files + * which depend on current governor. + */ ret = df->governor->event_handler(df, DEVFREQ_GOV_STOP, NULL); if (ret) { dev_warn(dev, "%s: Governor %s not stopped(%d)\n", __func__, df->governor->name, ret); goto out; } + remove_sysfs_files(df, df->governor); + /* + * Start the new governor and create the specific sysfs files + * which depend on the new governor. + */ prev_governor = df->governor; df->governor = governor; strncpy(df->governor_name, governor->name, DEVFREQ_NAME_LEN); @@ -1392,6 +1411,8 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, if (ret) { dev_warn(dev, "%s: Governor %s not started(%d)\n", __func__, df->governor->name, ret); + + /* Restore previous governor */ df->governor = prev_governor; strncpy(df->governor_name, prev_governor->name, DEVFREQ_NAME_LEN); @@ -1401,8 +1422,16 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, "%s: reverting to Governor %s failed (%d)\n", __func__, df->governor_name, ret); df->governor = NULL; + goto out; } } + + /* + * Create the sysfs files for the new governor. But if failed to start + * the new governor, restore the sysfs files of previous governor. + */ + create_sysfs_files(df, df->governor); + out: mutex_unlock(&devfreq_list_lock); @@ -1484,39 +1513,6 @@ static ssize_t target_freq_show(struct device *dev, } static DEVICE_ATTR_RO(target_freq); -static ssize_t polling_interval_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct devfreq *df = to_devfreq(dev); - - if (!df->profile) - return -EINVAL; - - return sprintf(buf, "%d\n", df->profile->polling_ms); -} - -static ssize_t polling_interval_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct devfreq *df = to_devfreq(dev); - unsigned int value; - int ret; - - if (!df->governor) - return -EINVAL; - - ret = sscanf(buf, "%u", &value); - if (ret != 1) - return -EINVAL; - - df->governor->event_handler(df, DEVFREQ_GOV_UPDATE_INTERVAL, &value); - ret = count; - - return ret; -} -static DEVICE_ATTR_RW(polling_interval); - static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -1724,6 +1720,53 @@ static ssize_t trans_stat_store(struct device *dev, } static DEVICE_ATTR_RW(trans_stat); +static struct attribute *devfreq_attrs[] = { + &dev_attr_name.attr, + &dev_attr_governor.attr, + &dev_attr_available_governors.attr, + &dev_attr_cur_freq.attr, + &dev_attr_available_frequencies.attr, + &dev_attr_target_freq.attr, + &dev_attr_min_freq.attr, + &dev_attr_max_freq.attr, + &dev_attr_trans_stat.attr, + NULL, +}; +ATTRIBUTE_GROUPS(devfreq); + +static ssize_t polling_interval_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *df = to_devfreq(dev); + + if (!df->profile) + return -EINVAL; + + return sprintf(buf, "%d\n", df->profile->polling_ms); +} + +static ssize_t polling_interval_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct devfreq *df = to_devfreq(dev); + unsigned int value; + int ret; + + if (!df->governor) + return -EINVAL; + + ret = sscanf(buf, "%u", &value); + if (ret != 1) + return -EINVAL; + + df->governor->event_handler(df, DEVFREQ_GOV_UPDATE_INTERVAL, &value); + ret = count; + + return ret; +} +static DEVICE_ATTR_RW(polling_interval); + static ssize_t timer_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1787,21 +1830,36 @@ out: } static DEVICE_ATTR_RW(timer); -static struct attribute *devfreq_attrs[] = { - &dev_attr_name.attr, - &dev_attr_governor.attr, - &dev_attr_available_governors.attr, - &dev_attr_cur_freq.attr, - &dev_attr_available_frequencies.attr, - &dev_attr_target_freq.attr, - &dev_attr_polling_interval.attr, - &dev_attr_min_freq.attr, - &dev_attr_max_freq.attr, - &dev_attr_trans_stat.attr, - &dev_attr_timer.attr, - NULL, -}; -ATTRIBUTE_GROUPS(devfreq); +#define CREATE_SYSFS_FILE(df, name) \ +{ \ + int ret; \ + ret = sysfs_create_file(&df->dev.kobj, &dev_attr_##name.attr); \ + if (ret < 0) { \ + dev_warn(&df->dev, \ + "Unable to create attr(%s)\n", "##name"); \ + } \ +} \ + +/* Create the specific sysfs files which depend on each governor. */ +static void create_sysfs_files(struct devfreq *devfreq, + const struct devfreq_governor *gov) +{ + if (IS_SUPPORTED_ATTR(gov->attrs, POLLING_INTERVAL)) + CREATE_SYSFS_FILE(devfreq, polling_interval); + if (IS_SUPPORTED_ATTR(gov->attrs, TIMER)) + CREATE_SYSFS_FILE(devfreq, timer); +} + +/* Remove the specific sysfs files which depend on each governor. */ +static void remove_sysfs_files(struct devfreq *devfreq, + const struct devfreq_governor *gov) +{ + if (IS_SUPPORTED_ATTR(gov->attrs, POLLING_INTERVAL)) + sysfs_remove_file(&devfreq->dev.kobj, + &dev_attr_polling_interval.attr); + if (IS_SUPPORTED_ATTR(gov->attrs, TIMER)) + sysfs_remove_file(&devfreq->dev.kobj, &dev_attr_timer.attr); +} /** * devfreq_summary_show() - Show the summary of the devfreq devices @@ -1858,8 +1916,12 @@ static int devfreq_summary_show(struct seq_file *s, void *data) mutex_lock(&devfreq->lock); cur_freq = devfreq->previous_freq; get_freq_range(devfreq, &min_freq, &max_freq); - polling_ms = devfreq->profile->polling_ms; timer = devfreq->profile->timer; + + if (IS_SUPPORTED_ATTR(devfreq->governor->attrs, POLLING_INTERVAL)) + polling_ms = devfreq->profile->polling_ms; + else + polling_ms = 0; mutex_unlock(&devfreq->lock); seq_printf(s, diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 7dbb110a869e..df413b851bb2 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -35,10 +35,21 @@ #define DEVFREQ_GOV_FLAG_IMMUTABLE BIT(0) #define DEVFREQ_GOV_FLAG_IRQ_DRIVEN BIT(1) +/* + * Definition of governor attribute flags except for common sysfs attributes + * - DEVFREQ_GOV_ATTR_POLLING_INTERVAL + * : Indicate polling_interal sysfs attribute + * - DEVFREQ_GOV_ATTR_TIMER + * : Indicate timer sysfs attribute + */ +#define DEVFREQ_GOV_ATTR_POLLING_INTERVAL BIT(0) +#define DEVFREQ_GOV_ATTR_TIMER BIT(1) + /** * struct devfreq_governor - Devfreq policy governor * @node: list node - contains registered devfreq governors * @name: Governor's name + * @attrs: Governor's sysfs attribute flags * @flags: Governor's feature flags * @get_target_freq: Returns desired operating frequency for the device. * Basically, get_target_freq will run @@ -57,6 +68,7 @@ struct devfreq_governor { struct list_head node; const char name[DEVFREQ_NAME_LEN]; + const u64 attrs; const u64 flags; int (*get_target_freq)(struct devfreq *this, unsigned long *freq); int (*event_handler)(struct devfreq *devfreq, diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c index 1b314e1df028..d57b82a2b570 100644 --- a/drivers/devfreq/governor_simpleondemand.c +++ b/drivers/devfreq/governor_simpleondemand.c @@ -117,6 +117,8 @@ static int devfreq_simple_ondemand_handler(struct devfreq *devfreq, static struct devfreq_governor devfreq_simple_ondemand = { .name = DEVFREQ_GOV_SIMPLE_ONDEMAND, + .attrs = DEVFREQ_GOV_ATTR_POLLING_INTERVAL + | DEVFREQ_GOV_ATTR_TIMER, .get_target_freq = devfreq_simple_ondemand_func, .event_handler = devfreq_simple_ondemand_handler, }; diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index faa1aecf2a31..8f5b60bef336 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -765,6 +765,7 @@ static int tegra_governor_event_handler(struct devfreq *devfreq, static struct devfreq_governor tegra_devfreq_governor = { .name = "tegra_actmon", + .attrs = DEVFREQ_GOV_ATTR_POLLING_INTERVAL, .flags = DEVFREQ_GOV_FLAG_IMMUTABLE | DEVFREQ_GOV_FLAG_IRQ_DRIVEN, .get_target_freq = tegra_governor_get_target, From 96ffcdf239de6f9970178bb7d643e16fd9e68ab9 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 20 Oct 2020 15:12:12 +0900 Subject: [PATCH 06/86] PM / devfreq: Remove redundant governor_name from struct devfreq The devfreq structure instance contains the governor_name and a governor instance. When need to show the governor name, better to use the name of devfreq_governor structure. So, governor_name variable in struct devfreq is a redundant and unneeded variable. Remove the redundant governor_name of struct devfreq and then use the name of devfreq_governor instance. Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 18 +++++++----------- drivers/devfreq/governor.h | 2 ++ include/linux/devfreq.h | 4 ---- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 02cfc6552913..6aa10de792b3 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -811,7 +811,6 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->dev.release = devfreq_dev_release; INIT_LIST_HEAD(&devfreq->node); devfreq->profile = profile; - strscpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); devfreq->previous_freq = profile->initial_freq; devfreq->last_status.current_frequency = profile->initial_freq; devfreq->data = data; @@ -907,7 +906,7 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq_list_lock); - governor = try_then_request_governor(devfreq->governor_name); + governor = try_then_request_governor(governor_name); if (IS_ERR(governor)) { dev_err(dev, "%s: Unable to find governor for the device\n", __func__); @@ -1249,7 +1248,7 @@ int devfreq_add_governor(struct devfreq_governor *governor) int ret = 0; struct device *dev = devfreq->dev.parent; - if (!strncmp(devfreq->governor_name, governor->name, + if (!strncmp(devfreq->governor->name, governor->name, DEVFREQ_NAME_LEN)) { /* The following should never occur */ if (devfreq->governor) { @@ -1311,7 +1310,7 @@ int devfreq_remove_governor(struct devfreq_governor *governor) int ret; struct device *dev = devfreq->dev.parent; - if (!strncmp(devfreq->governor_name, governor->name, + if (!strncmp(devfreq->governor->name, governor->name, DEVFREQ_NAME_LEN)) { /* we should have a devfreq governor! */ if (!devfreq->governor) { @@ -1406,7 +1405,6 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, */ prev_governor = df->governor; df->governor = governor; - strncpy(df->governor_name, governor->name, DEVFREQ_NAME_LEN); ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL); if (ret) { dev_warn(dev, "%s: Governor %s not started(%d)\n", @@ -1414,13 +1412,11 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, /* Restore previous governor */ df->governor = prev_governor; - strncpy(df->governor_name, prev_governor->name, - DEVFREQ_NAME_LEN); ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL); if (ret) { dev_err(dev, "%s: reverting to Governor %s failed (%d)\n", - __func__, df->governor_name, ret); + __func__, prev_governor->name, ret); df->governor = NULL; goto out; } @@ -1459,7 +1455,7 @@ static ssize_t available_governors_show(struct device *d, */ if (IS_SUPPORTED_FLAG(df->governor->flags, IMMUTABLE)) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, - "%s ", df->governor_name); + "%s ", df->governor->name); /* * The devfreq device shows the registered governor except for * immutable governors such as passive governor . @@ -1902,7 +1898,7 @@ static int devfreq_summary_show(struct seq_file *s, void *data) list_for_each_entry_reverse(devfreq, &devfreq_list, node) { #if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) - if (!strncmp(devfreq->governor_name, DEVFREQ_GOV_PASSIVE, + if (!strncmp(devfreq->governor->name, DEVFREQ_GOV_PASSIVE, DEVFREQ_NAME_LEN)) { struct devfreq_passive_data *data = devfreq->data; @@ -1928,7 +1924,7 @@ static int devfreq_summary_show(struct seq_file *s, void *data) "%-30s %-30s %-15s %-10s %10d %12ld %12ld %12ld\n", dev_name(&devfreq->dev), p_devfreq ? dev_name(&p_devfreq->dev) : "null", - devfreq->governor_name, + devfreq->governor->name, polling_ms ? timer_name[timer] : "null", polling_ms, cur_freq, diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index df413b851bb2..2a52f97b542d 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -13,6 +13,8 @@ #include +#define DEVFREQ_NAME_LEN 16 + #define to_devfreq(DEV) container_of((DEV), struct devfreq, dev) /* Devfreq events */ diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 121a2430d7f7..b6d3bae1c74d 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -15,8 +15,6 @@ #include #include -#define DEVFREQ_NAME_LEN 16 - /* DEVFREQ governor name */ #define DEVFREQ_GOV_SIMPLE_ONDEMAND "simple_ondemand" #define DEVFREQ_GOV_PERFORMANCE "performance" @@ -139,7 +137,6 @@ struct devfreq_stats { * using devfreq. * @profile: device-specific devfreq profile * @governor: method how to choose frequency based on the usage. - * @governor_name: devfreq governor name for use with this devfreq * @nb: notifier block used to notify devfreq object that it should * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. @@ -176,7 +173,6 @@ struct devfreq { struct device dev; struct devfreq_dev_profile *profile; const struct devfreq_governor *governor; - char governor_name[DEVFREQ_NAME_LEN]; struct notifier_block nb; struct delayed_work work; From 9a93386bf667499d71e35051804438fd32dd77a8 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 26 Oct 2020 01:17:30 +0300 Subject: [PATCH 07/86] PM / devfreq: tegra20: Silence deferred probe error Tegra EMC driver was turned into a regular kernel driver, meaning that it could be compiled as a loadable kernel module now. Hence EMC clock isn't guaranteed to be available and clk_get("emc") may return -EPROBE_DEFER. Let's silence the deferred probe error. Acked-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra20-devfreq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/devfreq/tegra20-devfreq.c b/drivers/devfreq/tegra20-devfreq.c index ff82bac9ee4e..fd801534771d 100644 --- a/drivers/devfreq/tegra20-devfreq.c +++ b/drivers/devfreq/tegra20-devfreq.c @@ -141,11 +141,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) /* EMC is a system-critical clock that is always enabled */ tegra->emc_clock = devm_clk_get(&pdev->dev, "emc"); - if (IS_ERR(tegra->emc_clock)) { - err = PTR_ERR(tegra->emc_clock); - dev_err(&pdev->dev, "failed to get emc clock: %d\n", err); - return err; - } + if (IS_ERR(tegra->emc_clock)) + return dev_err_probe(&pdev->dev, PTR_ERR(tegra->emc_clock), + "failed to get emc clock\n"); tegra->regs = mc->regs; From cc2a4e83612040a3dc49966c6c4ac5088ed7037f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 26 Oct 2020 01:17:31 +0300 Subject: [PATCH 08/86] PM / devfreq: tegra20: Relax Kconfig dependency The Tegra EMC driver now could be compiled as a loadable kernel module. Currently devfreq driver depends on the EMC/MC drivers in Kconfig, and thus, devfreq is forced to be a kernel module if EMC is compiled as a module. This build dependency could be relaxed since devfreq driver checks MC/EMC presence on probe, allowing kernel configuration where devfreq is a built-in driver and EMC driver is a loadable module. This change puts Tegra20 devfreq Kconfig entry on a par with the Tegra30 devfreq entry. Acked-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 37dc40d1fcfb..0ee36ae2fa79 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -123,7 +123,7 @@ config ARM_TEGRA_DEVFREQ config ARM_TEGRA20_DEVFREQ tristate "NVIDIA Tegra20 DEVFREQ Driver" - depends on (TEGRA_MC && TEGRA20_EMC) || COMPILE_TEST + depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST depends on COMMON_CLK select DEVFREQ_GOV_SIMPLE_ONDEMAND help From 09d56d92ad25b58113f4ec677e9b1ac1e2c3072b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 26 Oct 2020 01:17:33 +0300 Subject: [PATCH 09/86] PM / devfreq: tegra30: Silence deferred probe error Tegra EMC driver was turned into a regular kernel driver, meaning that it could be compiled as a loadable kernel module now. Hence EMC clock isn't guaranteed to be available and clk_get("emc") may return -EPROBE_DEFER. Let's silence the deferred probe error. Acked-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra30-devfreq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 8f5b60bef336..38cc0d014738 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -802,10 +802,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) } tegra->emc_clock = devm_clk_get(&pdev->dev, "emc"); - if (IS_ERR(tegra->emc_clock)) { - dev_err(&pdev->dev, "Failed to get emc clock\n"); - return PTR_ERR(tegra->emc_clock); - } + if (IS_ERR(tegra->emc_clock)) + return dev_err_probe(&pdev->dev, PTR_ERR(tegra->emc_clock), + "Failed to get emc clock\n"); err = platform_get_irq(pdev, 0); if (err < 0) From 748f0d70087c56226bf1df1f91a00b7ab4c8f883 Mon Sep 17 00:00:00 2001 From: Brahadambal Srinivasan Date: Fri, 23 Oct 2020 20:55:27 +0530 Subject: [PATCH 10/86] cpupower: Provide online and offline CPU information When a user tries to modify cpuidle or cpufreq properties on offline CPUs, the tool returns success (exit status 0) but also does not provide any warning message regarding offline cpus that may have been specified but left unchanged. In case of all or a few CPUs being offline, it can be difficult to keep track of which CPUs didn't get the new frequency or idle state set. Silent failures are difficult to keep track of when there are a huge number of CPUs on which the action is performed. This patch adds helper functions to find both online and offline CPUs and print them out accordingly. We use these helper functions in cpuidle-set and cpufreq-set to print an additional message if the user attempts to modify offline cpus. Reported-by: Pavithra R. Prakash Signed-off-by: Brahadambal Srinivasan Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-set.c | 3 + tools/power/cpupower/utils/cpuidle-set.c | 4 ++ tools/power/cpupower/utils/cpupower.c | 8 +++ tools/power/cpupower/utils/helpers/helpers.h | 12 ++++ tools/power/cpupower/utils/helpers/misc.c | 66 +++++++++++++++++++- 5 files changed, 92 insertions(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 7b2164e07057..c5e60a39cfa6 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -315,6 +315,7 @@ int cmd_freq_set(int argc, char **argv) } } + get_cpustate(); /* loop over CPUs */ for (cpu = bitmask_first(cpus_chosen); @@ -332,5 +333,7 @@ int cmd_freq_set(int argc, char **argv) } } + print_offline_cpus(); + return 0; } diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index 569f268f4c7f..46158928f9ad 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -95,6 +95,8 @@ int cmd_idle_set(int argc, char **argv) exit(EXIT_FAILURE); } + get_cpustate(); + /* Default is: set all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen); @@ -181,5 +183,7 @@ int cmd_idle_set(int argc, char **argv) break; } } + + print_offline_cpus(); return EXIT_SUCCESS; } diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 8e3d08042825..8ac3304a9957 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -34,6 +34,8 @@ int run_as_root; int base_cpu; /* Affected cpus chosen by -c/--cpu param */ struct bitmask *cpus_chosen; +struct bitmask *online_cpus; +struct bitmask *offline_cpus; #ifdef DEBUG int be_verbose; @@ -178,6 +180,8 @@ int main(int argc, const char *argv[]) char pathname[32]; cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); + online_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); + offline_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); argc--; argv += 1; @@ -230,6 +234,10 @@ int main(int argc, const char *argv[]) ret = p->main(argc, argv); if (cpus_chosen) bitmask_free(cpus_chosen); + if (online_cpus) + bitmask_free(online_cpus); + if (offline_cpus) + bitmask_free(offline_cpus); return ret; } print_help(); diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index c258eeccd05f..d5799aa71e1f 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -94,6 +94,8 @@ struct cpupower_cpu_info { */ extern int get_cpu_info(struct cpupower_cpu_info *cpu_info); extern struct cpupower_cpu_info cpupower_cpu_info; + + /* cpuid and cpuinfo helpers **************************/ /* X86 ONLY ****************************************/ @@ -171,4 +173,14 @@ static inline unsigned int cpuid_ecx(unsigned int op) { return 0; }; static inline unsigned int cpuid_edx(unsigned int op) { return 0; }; #endif /* defined(__i386__) || defined(__x86_64__) */ +/* + * CPU State related functions + */ +extern struct bitmask *online_cpus; +extern struct bitmask *offline_cpus; + +void get_cpustate(void); +void print_online_cpus(void); +void print_offline_cpus(void); + #endif /* __CPUPOWERUTILS_HELPERS__ */ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index f406adc40bad..2ead98169cf5 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -1,8 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#if defined(__i386__) || defined(__x86_64__) + +#include +#include #include "helpers/helpers.h" +#if defined(__i386__) || defined(__x86_64__) + #define MSR_AMD_HWCR 0xc0010015 int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, @@ -41,3 +45,63 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, return 0; } #endif /* #if defined(__i386__) || defined(__x86_64__) */ + +/* get_cpustate + * + * Gather the information of all online CPUs into bitmask struct + */ +void get_cpustate(void) +{ + unsigned int cpu = 0; + + bitmask_clearall(online_cpus); + bitmask_clearall(offline_cpus); + + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (cpupower_is_cpu_online(cpu) == 1) + bitmask_setbit(online_cpus, cpu); + else + bitmask_setbit(offline_cpus, cpu); + + continue; + } +} + +/* print_online_cpus + * + * Print the CPU numbers of all CPUs that are online currently + */ +void print_online_cpus(void) +{ + int str_len = 0; + char *online_cpus_str = NULL; + + str_len = online_cpus->size * 5; + online_cpus_str = (void *)malloc(sizeof(char) * str_len); + + if (!bitmask_isallclear(online_cpus)) { + bitmask_displaylist(online_cpus_str, str_len, online_cpus); + printf(_("Following CPUs are online:\n%s\n"), online_cpus_str); + } +} + +/* print_offline_cpus + * + * Print the CPU numbers of all CPUs that are offline currently + */ +void print_offline_cpus(void) +{ + int str_len = 0; + char *offline_cpus_str = NULL; + + str_len = offline_cpus->size * 5; + offline_cpus_str = (void *)malloc(sizeof(char) * str_len); + + if (!bitmask_isallclear(offline_cpus)) { + bitmask_displaylist(offline_cpus_str, str_len, offline_cpus); + printf(_("Following CPUs are offline:\n%s\n"), offline_cpus_str); + printf(_("cpupower set operation was not performed on them\n")); + } +} From ef43f01ac06976b2aa2b17266d307bb1a4f7e6f9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 22 Oct 2020 12:12:27 +0530 Subject: [PATCH 11/86] opp: Always add entries in dev_list with opp_table->lock held The readers of dev_list expect the updates to it to take place from within the opp_table->lock and this is missing in the case where the dev_list is updated for already managed OPPs. Fix that by calling _add_opp_dev() from there and remove the now unused _add_opp_dev_unlocked() callback. While at it, also reduce the length of the critical section in _add_opp_dev(). Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0e0a5269dc82..84035ab8bb31 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1036,8 +1036,8 @@ static void _remove_opp_dev(struct opp_device *opp_dev, kfree(opp_dev); } -static struct opp_device *_add_opp_dev_unlocked(const struct device *dev, - struct opp_table *opp_table) +struct opp_device *_add_opp_dev(const struct device *dev, + struct opp_table *opp_table) { struct opp_device *opp_dev; @@ -1048,7 +1048,9 @@ static struct opp_device *_add_opp_dev_unlocked(const struct device *dev, /* Initialize opp-dev */ opp_dev->dev = dev; + mutex_lock(&opp_table->lock); list_add(&opp_dev->node, &opp_table->dev_list); + mutex_unlock(&opp_table->lock); /* Create debugfs entries for the opp_table */ opp_debug_register(opp_dev, opp_table); @@ -1056,18 +1058,6 @@ static struct opp_device *_add_opp_dev_unlocked(const struct device *dev, return opp_dev; } -struct opp_device *_add_opp_dev(const struct device *dev, - struct opp_table *opp_table) -{ - struct opp_device *opp_dev; - - mutex_lock(&opp_table->lock); - opp_dev = _add_opp_dev_unlocked(dev, opp_table); - mutex_unlock(&opp_table->lock); - - return opp_dev; -} - static struct opp_table *_allocate_opp_table(struct device *dev, int index) { struct opp_table *opp_table; @@ -1148,7 +1138,7 @@ static struct opp_table *_opp_get_opp_table(struct device *dev, int index) opp_table = _managed_opp(dev, index); if (opp_table) { - if (!_add_opp_dev_unlocked(dev, opp_table)) { + if (!_add_opp_dev(dev, opp_table)) { dev_pm_opp_put_opp_table(opp_table); opp_table = ERR_PTR(-ENOMEM); } From 56a7ff75cd08987812209971e319f78156ea2bb1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Oct 2020 13:57:45 +0200 Subject: [PATCH 12/86] cpufreq: Drop restore_freq from struct cpufreq_policy The restore_freq field in struct cpufreq_policy is only used by __target_index() in one place and a local variable in that function may as well be used instead of it, so drop it and modify __target_index() accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 10 +++++----- include/linux/cpufreq.h | 5 ----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 336b5e94cbc8..f2d96175f62d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2123,7 +2123,7 @@ static int __target_intermediate(struct cpufreq_policy *policy, static int __target_index(struct cpufreq_policy *policy, int index) { struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0}; - unsigned int intermediate_freq = 0; + unsigned int restore_freq, intermediate_freq = 0; unsigned int newfreq = policy->freq_table[index].frequency; int retval = -EINVAL; bool notify; @@ -2131,6 +2131,9 @@ static int __target_index(struct cpufreq_policy *policy, int index) if (newfreq == policy->cur) return 0; + /* Save last value to restore later on errors */ + restore_freq = policy->cur; + notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION); if (notify) { /* Handle switching to intermediate frequency */ @@ -2168,7 +2171,7 @@ static int __target_index(struct cpufreq_policy *policy, int index) */ if (unlikely(retval && intermediate_freq)) { freqs.old = intermediate_freq; - freqs.new = policy->restore_freq; + freqs.new = restore_freq; cpufreq_freq_transition_begin(policy, &freqs); cpufreq_freq_transition_end(policy, &freqs, 0); } @@ -2203,9 +2206,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, !(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS)) return 0; - /* Save last value to restore later on errors */ - policy->restore_freq = policy->cur; - if (cpufreq_driver->target) return cpufreq_driver->target(policy, target_freq, relation); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9779a6cd8baa 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,7 +65,6 @@ struct cpufreq_policy { unsigned int max; /* in kHz */ unsigned int cur; /* in kHz, only needed if cpufreq * governors are used */ - unsigned int restore_freq; /* = policy->cur before transition */ unsigned int suspend_freq; /* freq to set during suspend */ unsigned int policy; /* see above */ @@ -308,10 +307,6 @@ struct cpufreq_driver { /* define one out of two */ int (*setpolicy)(struct cpufreq_policy *policy); - /* - * On failure, should always restore frequency to policy->restore_freq - * (i.e. old freq). - */ int (*target)(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); /* Deprecated */ From 27c09484dd3d8fdb56eb3787877d6035d0e89669 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Oct 2020 16:53:21 +0530 Subject: [PATCH 13/86] opp: Allocate the OPP table outside of opp_table_lock There is no critical section which needs protection with locks while allocating an OPP table, except while adding it to the opp_tables list. And taking the opp_table_lock for the entire duration causes circular dependency issues like the one mentioned below. This patch takes another approach to reduce the size of the critical section to avoid such issues, the details of that are present within the patch. ====================================================== WARNING: possible circular locking dependency detected 5.4.72 #14 Not tainted ------------------------------------------------------ chrome/1865 is trying to acquire lock: ffffffdd34921750 (opp_table_lock){+.+.}, at: _find_opp_table+0x34/0x74 but task is already holding lock: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec fscrypt: AES-256-CTS-CBC using implementation "cts-cbc-aes-ce" which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (reservation_ww_class_mutex){+.+.}: __mutex_lock_common+0xec/0xc0c ww_mutex_lock_interruptible+0x5c/0xc4 msm_gem_fault+0x2c/0x124 __do_fault+0x40/0x16c handle_mm_fault+0x7cc/0xd98 do_page_fault+0x230/0x3b4 do_translation_fault+0x5c/0x78 do_mem_abort+0x4c/0xb4 el0_da+0x1c/0x20 -> #3 (&mm->mmap_sem){++++}: __might_fault+0x70/0x98 compat_filldir+0xf8/0x48c dcache_readdir+0x70/0x1dc iterate_dir+0xd4/0x180 __arm64_compat_sys_getdents+0xa0/0x19c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 -> #2 (&sb->s_type->i_mutex_key#3){++++}: down_write+0x54/0x16c start_creating+0x68/0x128 debugfs_create_dir+0x28/0x114 opp_debug_register+0x8c/0xc0 _add_opp_dev_unlocked+0x5c/0x70 _add_opp_dev+0x38/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #1 (&opp_table->lock){+.+.}: __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _add_opp_dev+0x2c/0x58 _opp_get_opp_table+0xdc/0x1ac dev_pm_opp_get_opp_table_indexed+0x24/0x30 dev_pm_opp_of_add_table_indexed+0x48/0x84 of_genpd_add_provider_onecell+0xc0/0x1b8 rpmhpd_probe+0x240/0x268 platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_populate+0x70/0xbc devm_of_platform_populate+0x58/0xa0 rpmh_rsc_probe+0x36c/0x3cc platform_drv_probe+0x90/0xb0 really_probe+0x134/0x2ec driver_probe_device+0x64/0xfc __device_attach_driver+0x8c/0xa4 bus_for_each_drv+0x90/0xd8 __device_attach+0xc0/0x148 device_initial_probe+0x20/0x2c bus_probe_device+0x34/0x94 device_add+0x1fc/0x3b0 of_device_add+0x3c/0x4c of_platform_device_create_pdata+0xb8/0xfc of_platform_bus_create+0x1e4/0x368 of_platform_bus_create+0x230/0x368 of_platform_populate+0x70/0xbc of_platform_default_populate_init+0xa8/0xc0 do_one_initcall+0x1c8/0x3fc do_initcall_level+0xb4/0x10c do_basic_setup+0x30/0x48 kernel_init_freeable+0x124/0x1a4 kernel_init+0x14/0x104 ret_from_fork+0x10/0x18 -> #0 (opp_table_lock){+.+.}: __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 other info that might help us debug this: Chain exists of: opp_table_lock --> &mm->mmap_sem --> reservation_ww_class_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reservation_ww_class_mutex); lock(&mm->mmap_sem); lock(reservation_ww_class_mutex); lock(opp_table_lock); *** DEADLOCK *** 3 locks held by chrome/1865: #0: ffffff81edecc0d8 (&dev->struct_mutex){+.+.}, at: msm_ioctl_gem_submit+0x264/0xb60 #1: ffffff81d0000870 (reservation_ww_class_acquire){+.+.}, at: msm_ioctl_gem_submit+0x8e8/0xb60 #2: ffffff81f0fc71a8 (reservation_ww_class_mutex){+.+.}, at: submit_lock_objects+0x70/0x1ec stack backtrace: CPU: 0 PID: 1865 Comm: chrome Not tainted 5.4.72 #14 Hardware name: Google Lazor (rev1+) with LTE (DT) Call trace: dump_backtrace+0x0/0x158 show_stack+0x20/0x2c dump_stack+0xc8/0x160 print_circular_bug+0x2c4/0x2c8 check_noncircular+0x1a8/0x1b0 __lock_acquire+0xee4/0x2450 lock_acquire+0x1cc/0x210 __mutex_lock_common+0xec/0xc0c mutex_lock_nested+0x40/0x50 _find_opp_table+0x34/0x74 dev_pm_opp_find_freq_exact+0x2c/0xdc a6xx_gmu_resume+0xc8/0xecc a6xx_pm_resume+0x148/0x200 adreno_resume+0x28/0x34 pm_generic_runtime_resume+0x34/0x48 __rpm_callback+0x70/0x10c rpm_callback+0x34/0x8c rpm_resume+0x414/0x550 __pm_runtime_resume+0x7c/0xa0 msm_gpu_submit+0x60/0x1c0 msm_ioctl_gem_submit+0xadc/0xb60 drm_ioctl_kernel+0x9c/0x118 drm_ioctl+0x27c/0x408 drm_compat_ioctl+0xcc/0xdc __se_compat_sys_ioctl+0x100/0x206c __arm64_compat_sys_ioctl+0x20/0x2c el0_svc_common+0xa8/0x178 el0_svc_compat_handler+0x2c/0x40 el0_svc_compat+0x8/0x10 Reported-by: Rob Clark Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 47 +++++++++++++++++++++++++++++++++++++++++----- drivers/opp/of.c | 5 +++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 84035ab8bb31..6f4a73a6391f 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -29,6 +29,8 @@ LIST_HEAD(opp_tables); /* Lock to allow exclusive modification to the device and opp lists */ DEFINE_MUTEX(opp_table_lock); +/* Flag indicating that opp_tables list is being updated at the moment */ +static bool opp_tables_busy; static struct opp_device *_find_opp_dev(const struct device *dev, struct opp_table *opp_table) @@ -1111,8 +1113,6 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) INIT_LIST_HEAD(&opp_table->opp_list); kref_init(&opp_table->kref); - /* Secure the device table modification */ - list_add(&opp_table->node, &opp_tables); return opp_table; err: @@ -1125,27 +1125,64 @@ void _get_opp_table_kref(struct opp_table *opp_table) kref_get(&opp_table->kref); } +/* + * We need to make sure that the OPP table for a device doesn't get added twice, + * if this routine gets called in parallel with the same device pointer. + * + * The simplest way to enforce that is to perform everything (find existing + * table and if not found, create a new one) under the opp_table_lock, so only + * one creator gets access to the same. But that expands the critical section + * under the lock and may end up causing circular dependencies with frameworks + * like debugfs, interconnect or clock framework as they may be direct or + * indirect users of OPP core. + * + * And for that reason we have to go for a bit tricky implementation here, which + * uses the opp_tables_busy flag to indicate if another creator is in the middle + * of adding an OPP table and others should wait for it to finish. + */ static struct opp_table *_opp_get_opp_table(struct device *dev, int index) { struct opp_table *opp_table; - /* Hold our table modification lock here */ +again: mutex_lock(&opp_table_lock); opp_table = _find_opp_table_unlocked(dev); if (!IS_ERR(opp_table)) goto unlock; + /* + * The opp_tables list or an OPP table's dev_list is getting updated by + * another user, wait for it to finish. + */ + if (unlikely(opp_tables_busy)) { + mutex_unlock(&opp_table_lock); + cpu_relax(); + goto again; + } + + opp_tables_busy = true; opp_table = _managed_opp(dev, index); + + /* Drop the lock to reduce the size of critical section */ + mutex_unlock(&opp_table_lock); + if (opp_table) { if (!_add_opp_dev(dev, opp_table)) { dev_pm_opp_put_opp_table(opp_table); opp_table = ERR_PTR(-ENOMEM); } - goto unlock; + + mutex_lock(&opp_table_lock); + } else { + opp_table = _allocate_opp_table(dev, index); + + mutex_lock(&opp_table_lock); + if (!IS_ERR(opp_table)) + list_add(&opp_table->node, &opp_tables); } - opp_table = _allocate_opp_table(dev, index); + opp_tables_busy = false; unlock: mutex_unlock(&opp_table_lock); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 9faeb83e4b32..aa0ac5d4e479 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -112,8 +112,6 @@ static struct opp_table *_find_table_of_opp_np(struct device_node *opp_np) struct opp_table *opp_table; struct device_node *opp_table_np; - lockdep_assert_held(&opp_table_lock); - opp_table_np = of_get_parent(opp_np); if (!opp_table_np) goto err; @@ -121,12 +119,15 @@ static struct opp_table *_find_table_of_opp_np(struct device_node *opp_np) /* It is safe to put the node now as all we need now is its address */ of_node_put(opp_table_np); + mutex_lock(&opp_table_lock); list_for_each_entry(opp_table, &opp_tables, node) { if (opp_table_np == opp_table->np) { _get_opp_table_kref(opp_table); + mutex_unlock(&opp_table_lock); return opp_table; } } + mutex_unlock(&opp_table_lock); err: return ERR_PTR(-ENODEV); From 9e62edac519da71bbc981e4c984fe67729b0d1f3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Oct 2020 11:49:03 +0530 Subject: [PATCH 14/86] opp: Don't return opp_dev from _find_opp_dev() The caller of _find_opp_dev() only needs to know if the opp_dev is there in the list or not. Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 6f4a73a6391f..9915e8487f0b 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -32,31 +32,29 @@ DEFINE_MUTEX(opp_table_lock); /* Flag indicating that opp_tables list is being updated at the moment */ static bool opp_tables_busy; -static struct opp_device *_find_opp_dev(const struct device *dev, - struct opp_table *opp_table) +static bool _find_opp_dev(const struct device *dev, struct opp_table *opp_table) { struct opp_device *opp_dev; + bool found = false; + mutex_lock(&opp_table->lock); list_for_each_entry(opp_dev, &opp_table->dev_list, node) - if (opp_dev->dev == dev) - return opp_dev; + if (opp_dev->dev == dev) { + found = true; + break; + } - return NULL; + mutex_unlock(&opp_table->lock); + return found; } static struct opp_table *_find_opp_table_unlocked(struct device *dev) { struct opp_table *opp_table; - bool found; list_for_each_entry(opp_table, &opp_tables, node) { - mutex_lock(&opp_table->lock); - found = !!_find_opp_dev(dev, opp_table); - mutex_unlock(&opp_table->lock); - - if (found) { + if (_find_opp_dev(dev, opp_table)) { _get_opp_table_kref(opp_table); - return opp_table; } } From 4cfdad35ae7ed400d7146aeb57d34744ce53e9dc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 4 Nov 2020 19:48:37 +0300 Subject: [PATCH 15/86] clk: tegra: Export Tegra20 EMC kernel symbols We're going to modularize Tegra EMC drivers and some of the EMC-clock driver symbols need to be exported. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20201104164923.21238-2-digetx@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/clk/tegra/clk-tegra20-emc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/tegra/clk-tegra20-emc.c b/drivers/clk/tegra/clk-tegra20-emc.c index 03bf0009a33c..dd74b8543bf1 100644 --- a/drivers/clk/tegra/clk-tegra20-emc.c +++ b/drivers/clk/tegra/clk-tegra20-emc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -235,6 +236,7 @@ void tegra20_clk_set_emc_round_callback(tegra20_clk_emc_round_cb *round_cb, emc->cb_arg = cb_arg; } } +EXPORT_SYMBOL_GPL(tegra20_clk_set_emc_round_callback); bool tegra20_clk_emc_driver_available(struct clk_hw *emc_hw) { @@ -291,3 +293,4 @@ int tegra20_clk_prepare_emc_mc_same_freq(struct clk *emc_clk, bool same) return 0; } +EXPORT_SYMBOL_GPL(tegra20_clk_prepare_emc_mc_same_freq); From fc4fbf88ecbee88381899b4a7f97c9b52570e91a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 4 Nov 2020 19:48:38 +0300 Subject: [PATCH 16/86] soc/tegra: fuse: Export tegra_read_ram_code() The tegra_read_ram_code() is used by EMC drivers and we're going to make these driver modular, hence this function needs to be exported. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20201104164923.21238-3-digetx@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/soc/tegra/fuse/tegra-apbmisc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c index cee207d10024..590c862538d0 100644 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c @@ -3,6 +3,7 @@ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. */ +#include #include #include #include @@ -90,6 +91,7 @@ u32 tegra_read_ram_code(void) return straps >> PMC_STRAPPING_OPT_A_RAM_CODE_SHIFT; } +EXPORT_SYMBOL_GPL(tegra_read_ram_code); static const struct of_device_id apbmisc_match[] __initconst = { { .compatible = "nvidia,tegra20-apbmisc", }, From 245157a31e91aec7f5b621ed26c0a8370b1c8a64 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 4 Nov 2020 19:48:39 +0300 Subject: [PATCH 17/86] soc/tegra: fuse: Add stub for tegra_sku_info Drivers that use tegra_sku_info and have COMPILE_TEST are failing to be build due to the missing stub for tegra_sku_info. Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20201104164923.21238-4-digetx@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/soc/tegra/fuse.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h index c702bd2911bc..78cbc787a4dc 100644 --- a/include/soc/tegra/fuse.h +++ b/include/soc/tegra/fuse.h @@ -56,7 +56,11 @@ u32 tegra_read_straps(void); u32 tegra_read_ram_code(void); int tegra_fuse_readl(unsigned long offset, u32 *value); +#ifdef CONFIG_ARCH_TEGRA extern struct tegra_sku_info tegra_sku_info; +#else +static struct tegra_sku_info tegra_sku_info __maybe_unused; +#endif struct device *tegra_soc_device_register(void); From d60cd06331a3566d3305b3c7b566e79edf4e2095 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 30 Oct 2020 15:06:57 +0800 Subject: [PATCH 18/86] PM: ACPI: reboot: Use S5 for reboot After reboot, it's not possible to use hotkeys to enter BIOS setup and boot menu on some HP laptops. BIOS folks identified the root cause is the missing _PTS call, and BIOS is expecting _PTS to do proper reset. Using S5 for reboot is default behavior under Windows, "A full shutdown (S5) occurs when a system restart is requested" [1], so let's do the same here. [1] https://docs.microsoft.com/en-us/windows/win32/power/system-power-states Signed-off-by: Kai-Heng Feng [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- kernel/reboot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/reboot.c b/kernel/reboot.c index e7b78d5ae1ab..7e5aa1f78693 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -244,6 +244,8 @@ void migrate_to_reboot_cpu(void) void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); + if (pm_power_off_prepare) + pm_power_off_prepare(); migrate_to_reboot_cpu(); syscore_shutdown(); if (!cmd) From f39ee99f5118a71ea162f8f66d743a8c4c401540 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 23 Oct 2020 19:02:54 +0800 Subject: [PATCH 19/86] PM: sleep: Print driver flags for all devices during suspend/resume Currently there are 4 driver flags to control system suspend/resume behavior: DPM_FLAG_NO_DIRECT_COMPLETE, DPM_FLAG_SMART_PREPARE, DPM_FLAG_SMART_SUSPEND and DPM_FLAG_MAY_SKIP_RESUME. Print these flags during suspend/resume so as to get a brief understanding of the expected behavior of each device, and to facilitate suspend/resume debugging/tuning. To enable this tracing: echo 'file drivers/base/power/main.c +p' > /sys/kernel/debug/dynamic_debug/control Signed-off-by: Chen Yu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c7ac49042cee..2b4255dc101e 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -441,9 +441,9 @@ static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t stat static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info) { - dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event), + dev_dbg(dev, "%s%s%s driver flags: %x\n", info, pm_verb(state.event), ((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ? - ", may wakeup" : ""); + ", may wakeup" : "", dev->power.driver_flags); } static void pm_dev_err(struct device *dev, pm_message_t state, const char *info, From 298ed2b31f55280624417f80a09de0e28db8f786 Mon Sep 17 00:00:00 2001 From: Victor Ding Date: Tue, 27 Oct 2020 07:23:54 +0000 Subject: [PATCH 20/86] x86/msr-index: sort AMD RAPL MSRs by address MSRs in the rest of this file are sorted by their addresses; fixing the two outliers. No functional changes. Signed-off-by: Victor Ding Acked-by: Kim Phillips Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/msr-index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d93505..21917e134ad4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -326,8 +326,8 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 -#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b #define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 +#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b /* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 From a2c32fa736a590a7ab6e9601066a6772ae09869b Mon Sep 17 00:00:00 2001 From: Victor Ding Date: Tue, 27 Oct 2020 07:23:55 +0000 Subject: [PATCH 21/86] powercap/intel_rapl_msr: Convert rapl_msr_priv into pointer Changes the static struct rapl_msr_priv to a pointer to allow using a different RAPL MSR interface, preparing for supporting AMD's RAPL MSR interface. No functional changes. Signed-off-by: Victor Ding Acked-by: Kim Phillips [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_msr.c | 33 +++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 1646808d354c..a819b3b89b2f 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -31,7 +31,9 @@ #define MSR_VR_CURRENT_CONFIG 0x00000601 /* private data for RAPL MSR Interface */ -static struct rapl_if_priv rapl_msr_priv = { +static struct rapl_if_priv *rapl_msr_priv; + +static struct rapl_if_priv rapl_msr_priv_intel = { .reg_unit = MSR_RAPL_POWER_UNIT, .regs[RAPL_DOMAIN_PACKAGE] = { MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO }, @@ -58,9 +60,9 @@ static int rapl_cpu_online(unsigned int cpu) { struct rapl_package *rp; - rp = rapl_find_package_domain(cpu, &rapl_msr_priv); + rp = rapl_find_package_domain(cpu, rapl_msr_priv); if (!rp) { - rp = rapl_add_package(cpu, &rapl_msr_priv); + rp = rapl_add_package(cpu, rapl_msr_priv); if (IS_ERR(rp)) return PTR_ERR(rp); } @@ -73,7 +75,7 @@ static int rapl_cpu_down_prep(unsigned int cpu) struct rapl_package *rp; int lead_cpu; - rp = rapl_find_package_domain(cpu, &rapl_msr_priv); + rp = rapl_find_package_domain(cpu, rapl_msr_priv); if (!rp) return 0; @@ -136,40 +138,41 @@ static int rapl_msr_probe(struct platform_device *pdev) const struct x86_cpu_id *id = x86_match_cpu(pl4_support_ids); int ret; - rapl_msr_priv.read_raw = rapl_msr_read_raw; - rapl_msr_priv.write_raw = rapl_msr_write_raw; + rapl_msr_priv = &rapl_msr_priv_intel; + rapl_msr_priv->read_raw = rapl_msr_read_raw; + rapl_msr_priv->write_raw = rapl_msr_write_raw; if (id) { - rapl_msr_priv.limits[RAPL_DOMAIN_PACKAGE] = 3; - rapl_msr_priv.regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] = + rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] = 3; + rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] = MSR_VR_CURRENT_CONFIG; pr_info("PL4 support detected.\n"); } - rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); - if (IS_ERR(rapl_msr_priv.control_type)) { + rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); + if (IS_ERR(rapl_msr_priv->control_type)) { pr_debug("failed to register powercap control_type.\n"); - return PTR_ERR(rapl_msr_priv.control_type); + return PTR_ERR(rapl_msr_priv->control_type); } ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online", rapl_cpu_online, rapl_cpu_down_prep); if (ret < 0) goto out; - rapl_msr_priv.pcap_rapl_online = ret; + rapl_msr_priv->pcap_rapl_online = ret; return 0; out: if (ret) - powercap_unregister_control_type(rapl_msr_priv.control_type); + powercap_unregister_control_type(rapl_msr_priv->control_type); return ret; } static int rapl_msr_remove(struct platform_device *pdev) { - cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); - powercap_unregister_control_type(rapl_msr_priv.control_type); + cpuhp_remove_state(rapl_msr_priv->pcap_rapl_online); + powercap_unregister_control_type(rapl_msr_priv->control_type); return 0; } From 43756a298928c9a4e8201baaeb205c0c103728af Mon Sep 17 00:00:00 2001 From: Victor Ding Date: Tue, 27 Oct 2020 07:23:56 +0000 Subject: [PATCH 22/86] powercap: Add AMD Fam17h RAPL support Enable AMD Fam17h RAPL support for the power capping framework. The support is as per AMD Fam17h Model31h (Zen2) and model 00-ffh (Zen1) PPR. Tested by comparing the results of following two sysfs entries and the values directly read from corresponding MSRs via /dev/cpu/[x]/msr: /sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:0/energy_uj Signed-off-by: Victor Ding Acked-by: Kim Phillips [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/msr-index.h | 1 + drivers/powercap/intel_rapl_common.c | 6 ++++++ drivers/powercap/intel_rapl_msr.c | 20 +++++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 21917e134ad4..c36a083c8ec0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -327,6 +327,7 @@ #define MSR_PP1_POLICY 0x00000642 #define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 +#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a #define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b /* Config TDP MSRs */ diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 70d6d52bc1e2..1ecd09424445 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1011,6 +1011,10 @@ static const struct rapl_defaults rapl_defaults_cht = { .compute_time_window = rapl_compute_time_window_atom, }; +static const struct rapl_defaults rapl_defaults_amd = { + .check_unit = rapl_check_unit_core, +}; + static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core), @@ -1061,6 +1065,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server), + + X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd), {} }; MODULE_DEVICE_TABLE(x86cpu, rapl_ids); diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index a819b3b89b2f..78213d4b5b16 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -49,6 +49,14 @@ static struct rapl_if_priv rapl_msr_priv_intel = { .limits[RAPL_DOMAIN_PLATFORM] = 2, }; +static struct rapl_if_priv rapl_msr_priv_amd = { + .reg_unit = MSR_AMD_RAPL_POWER_UNIT, + .regs[RAPL_DOMAIN_PACKAGE] = { + 0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 }, + .regs[RAPL_DOMAIN_PP0] = { + 0, MSR_AMD_CORE_ENERGY_STATUS, 0, 0, 0 }, +}; + /* Handles CPU hotplug on multi-socket systems. * If a CPU goes online as the first CPU of the physical package * we add the RAPL package to the system. Similarly, when the last @@ -138,7 +146,17 @@ static int rapl_msr_probe(struct platform_device *pdev) const struct x86_cpu_id *id = x86_match_cpu(pl4_support_ids); int ret; - rapl_msr_priv = &rapl_msr_priv_intel; + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + rapl_msr_priv = &rapl_msr_priv_intel; + break; + case X86_VENDOR_AMD: + rapl_msr_priv = &rapl_msr_priv_amd; + break; + default: + pr_err("intel-rapl does not support CPU vendor %d\n", boot_cpu_data.x86_vendor); + return -ENODEV; + } rapl_msr_priv->read_raw = rapl_msr_read_raw; rapl_msr_priv->write_raw = rapl_msr_write_raw; From 8a9d881f22d7a0e06a46a326d0880fb45a06d3b5 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 27 Oct 2020 07:23:57 +0000 Subject: [PATCH 23/86] powercap: RAPL: Add AMD Fam19h RAPL support AMD Family 19h's RAPL MSRs are identical to Family 17h's. Extend Family 17h's support to Family 19h. Signed-off-by: Kim Phillips Signed-off-by: Victor Ding Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 1ecd09424445..c9e57237d778 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1067,6 +1067,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server), X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd), + X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd), {} }; MODULE_DEVICE_TABLE(x86cpu, rapl_ids); From c250d50fe2ce627ca9805d9c8ac11cbbf922a4a6 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 5 Nov 2020 12:50:01 +0000 Subject: [PATCH 24/86] PM: EM: Add a flag indicating units of power values in Energy Model There are different platforms and devices which might use different scale for the power values. Kernel sub-systems might need to check if all Energy Model (EM) devices are using the same scale. Address that issue and store the information inside EM for each device. Thanks to that they can be easily compared and proper action triggered. Suggested-by: Daniel Lezcano Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scmi-cpufreq.c | 3 ++- drivers/opp/of.c | 2 +- include/linux/energy_model.h | 9 +++++++-- kernel/power/energy_model.c | 24 +++++++++++++++++++++++- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index e855e8612a67..3714a4cd07fa 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -188,7 +188,8 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = handle->perf_ops->fast_switch_possible(handle, cpu_dev); - em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus); + em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, + false); return 0; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 9faeb83e4b32..16f39e2127a5 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1335,7 +1335,7 @@ int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus) goto failed; } - ret = em_dev_register_perf_domain(dev, nr_opp, &em_cb, cpus); + ret = em_dev_register_perf_domain(dev, nr_opp, &em_cb, cpus, true); if (ret) goto failed; diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index b67a51c574b9..3a33c738d876 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -29,6 +29,8 @@ struct em_perf_state { * em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states + * @milliwatts: Flag indicating the power values are in milli-Watts + * or some other scale. * @cpus: Cpumask covering the CPUs of the domain. It's here * for performance reasons to avoid potential cache * misses during energy calculations in the scheduler @@ -43,6 +45,7 @@ struct em_perf_state { struct em_perf_domain { struct em_perf_state *table; int nr_perf_states; + int milliwatts; unsigned long cpus[]; }; @@ -79,7 +82,8 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span); + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -186,7 +190,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span) + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts) { return -EINVAL; } diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index c1ff7fa030ab..efe2a595988e 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -52,6 +52,17 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); +static int em_debug_units_show(struct seq_file *s, void *unused) +{ + struct em_perf_domain *pd = s->private; + char *units = pd->milliwatts ? "milliWatts" : "bogoWatts"; + + seq_printf(s, "%s\n", units); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(em_debug_units); + static void em_debug_create_pd(struct device *dev) { struct dentry *d; @@ -64,6 +75,8 @@ static void em_debug_create_pd(struct device *dev) debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, &em_debug_cpus_fops); + debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops); + /* Create a sub-directory for each performance state */ for (i = 0; i < dev->em_pd->nr_perf_states; i++) em_debug_create_ps(&dev->em_pd->table[i], d); @@ -250,17 +263,24 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * @cpus : Pointer to cpumask_t, which in case of a CPU device is * obligatory. It can be taken from i.e. 'policy->cpus'. For other * type of devices this should be set to NULL. + * @milliwatts : Flag indicating that the power values are in milliWatts or + * in some other scale. It must be set properly. * * Create Energy Model tables for a performance domain using the callbacks * defined in cb. * + * The @milliwatts is important to set with correct value. Some kernel + * sub-systems might rely on this flag and check if all devices in the EM are + * using the same scale. + * * If multiple clients register the same performance domain, all but the first * registration will be ignored. * * Return 0 on success */ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *cpus) + struct em_data_callback *cb, cpumask_t *cpus, + bool milliwatts) { unsigned long cap, prev_cap = 0; int cpu, ret; @@ -313,6 +333,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, if (ret) goto unlock; + dev->em_pd->milliwatts = milliwatts; + em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); From 5a64f775691647c242aa40d34f3512e7b179a921 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 3 Nov 2020 09:05:58 +0000 Subject: [PATCH 25/86] PM: EM: Clarify abstract scale usage for power values in Energy Model The Energy Model (EM) can store power values in milli-Watts or in abstract scale. This might cause issues in the subsystems which use the EM for estimating the device power, such as: - mixing of different scales in a subsystem which uses multiple (cooling) devices (e.g. thermal Intelligent Power Allocation (IPA)) - assuming that energy [milli-Joules] can be derived from the EM power values which might not be possible since the power scale doesn't have to be in milli-Watts To avoid misconfiguration add the requisite documentation to the EM and related subsystems: EAS and IPA. Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- .../driver-api/thermal/power_allocator.rst | 12 +++++++++++- Documentation/power/energy-model.rst | 13 +++++++++++++ Documentation/scheduler/sched-energy.rst | 5 +++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/driver-api/thermal/power_allocator.rst b/Documentation/driver-api/thermal/power_allocator.rst index 67b6a3297238..aa5f66552d6f 100644 --- a/Documentation/driver-api/thermal/power_allocator.rst +++ b/Documentation/driver-api/thermal/power_allocator.rst @@ -71,7 +71,9 @@ to the speed-grade of the silicon. `sustainable_power` is therefore simply an estimate, and may be tuned to affect the aggressiveness of the thermal ramp. For reference, the sustainable power of a 4" phone is typically 2000mW, while on a 10" tablet is around 4500mW (may vary -depending on screen size). +depending on screen size). It is possible to have the power value +expressed in an abstract scale. The sustained power should be aligned +to the scale used by the related cooling devices. If you are using device tree, do add it as a property of the thermal-zone. For example:: @@ -269,3 +271,11 @@ won't be very good. Note that this is not particular to this governor, step-wise will also misbehave if you call its throttle() faster than the normal thermal framework tick (due to interrupts for example) as it will overreact. + +Energy Model requirements +========================= + +Another important thing is the consistent scale of the power values +provided by the cooling devices. All of the cooling devices in a single +thermal zone should have power values reported either in milli-Watts +or scaled to the same 'abstract scale'. diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index a6fb986abe3c..ba7aa581b307 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -20,6 +20,19 @@ possible source of information on its own, the EM framework intervenes as an abstraction layer which standardizes the format of power cost tables in the kernel, hence enabling to avoid redundant work. +The power values might be expressed in milli-Watts or in an 'abstract scale'. +Multiple subsystems might use the EM and it is up to the system integrator to +check that the requirements for the power value scale types are met. An example +can be found in the Energy-Aware Scheduler documentation +Documentation/scheduler/sched-energy.rst. For some subsystems like thermal or +powercap power values expressed in an 'abstract scale' might cause issues. +These subsystems are more interested in estimation of power used in the past, +thus the real milli-Watts might be needed. An example of these requirements can +be found in the Intelligent Power Allocation in +Documentation/driver-api/thermal/power_allocator.rst. +Important thing to keep in mind is that when the power values are expressed in +an 'abstract scale' deriving real energy in milli-Joules would not be possible. + The figure below depicts an example of drivers (Arm-specific here, but the approach is applicable to any architecture) providing power costs to the EM framework, and interested clients reading the data from it:: diff --git a/Documentation/scheduler/sched-energy.rst b/Documentation/scheduler/sched-energy.rst index 001e09c95e1d..afe02d394402 100644 --- a/Documentation/scheduler/sched-energy.rst +++ b/Documentation/scheduler/sched-energy.rst @@ -350,6 +350,11 @@ independent EM framework in Documentation/power/energy-model.rst. Please also note that the scheduling domains need to be re-built after the EM has been registered in order to start EAS. +EAS uses the EM to make a forecasting decision on energy usage and thus it is +more focused on the difference when checking possible options for task +placement. For EAS it doesn't matter whether the EM power values are expressed +in milli-Watts or in an 'abstract scale'. + 6.3 - Energy Model complexity ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From f2c90b12e700fff6a0b5a1c32f446f05f9d0890c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 3 Nov 2020 09:05:59 +0000 Subject: [PATCH 26/86] PM: EM: update the comments related to power scale The Energy Model supports power values expressed in milli-Watts or in an 'abstract scale'. Update the related comments is the code to reflect that state. Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 11 +++++------ kernel/power/energy_model.c | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 3a33c738d876..9618c0a46ef4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -13,9 +13,8 @@ /** * em_perf_state - Performance state of a performance domain * @frequency: The frequency in KHz, for consistency with CPUFreq - * @power: The power consumed at this level, in milli-watts (by 1 CPU or - by a registered device). It can be a total power: static and - dynamic. + * @power: The power consumed at this level (by 1 CPU or by a registered + * device). It can be a total power: static and dynamic. * @cost: The cost coefficient associated with this level, used during * energy calculation. Equal to: power * max_frequency / frequency */ @@ -58,7 +57,7 @@ struct em_data_callback { /** * active_power() - Provide power at the next performance state of * a device - * @power : Active power at the performance state in mW + * @power : Active power at the performance state * (modified) * @freq : Frequency at the performance state in kHz * (modified) @@ -69,8 +68,8 @@ struct em_data_callback { * and frequency. * * In case of CPUs, the power is the one of a single CPU in the domain, - * expressed in milli-watts. It is expected to fit in the - * [0, EM_MAX_POWER] range. + * expressed in milli-Watts or an abstract scale. It is expected to + * fit in the [0, EM_MAX_POWER] range. * * Return 0 on success. */ diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index efe2a595988e..1358fa4abfa8 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -143,7 +143,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, /* * The power returned by active_state() is expected to be - * positive, in milli-watts and to fit into 16 bits. + * positive and to fit into 16 bits. */ if (!power || power > EM_MAX_POWER) { dev_err(dev, "EM: invalid power: %lu\n", From b56a352c0d3ca4640c3c6364e592be360ac0d6d4 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 3 Nov 2020 09:06:00 +0000 Subject: [PATCH 27/86] PM: EM: Update Energy Model with new flag indicating power scale Update description and meaning of a new flag, which indicates the type of power scale used for a registered Energy Model (EM) device. Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- Documentation/power/energy-model.rst | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index ba7aa581b307..60ac091d3b0d 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -30,6 +30,8 @@ These subsystems are more interested in estimation of power used in the past, thus the real milli-Watts might be needed. An example of these requirements can be found in the Intelligent Power Allocation in Documentation/driver-api/thermal/power_allocator.rst. +Kernel subsystems might implement automatic detection to check whether EM +registered devices have inconsistent scale (based on EM internal flag). Important thing to keep in mind is that when the power values are expressed in an 'abstract scale' deriving real energy in milli-Joules would not be possible. @@ -86,7 +88,7 @@ Drivers are expected to register performance domains into the EM framework by calling the following API:: int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *cpus); + struct em_data_callback *cb, cpumask_t *cpus, bool milliwatts); Drivers must provide a callback function returning tuples for each performance state. The callback function provided by the driver is free @@ -94,6 +96,10 @@ to fetch data from any relevant location (DT, firmware, ...), and by any mean deemed necessary. Only for CPU devices, drivers must specify the CPUs of the performance domains using cpumask. For other devices than CPUs the last argument must be set to NULL. +The last argument 'milliwatts' is important to set with correct value. Kernel +subsystems which use EM might rely on this flag to check if all EM devices use +the same scale. If there are different scales, these subsystems might decide +to: return warning/error, stop working or panic. See Section 3. for an example of driver implementing this callback, and kernel/power/energy_model.c for further documentation on this API. @@ -169,7 +175,8 @@ EM framework:: 37 nr_opp = foo_get_nr_opp(policy); 38 39 /* And register the new performance domain */ - 40 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus); - 41 - 42 return 0; - 43 } + 40 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, + 41 true); + 42 + 43 return 0; + 44 } From fc51989062138744b56e47190915ce68484e73f3 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 3 Nov 2020 16:06:25 +0100 Subject: [PATCH 28/86] PM: domains: Rename pm_genpd_syscore_poweroff|poweron() To better describe what the pm_genpd_syscore_poweroff|poweron() functions actually do, let's rename them to dev_pm_genpd_suspend|resume() and update the rather few callers of them accordingly (a couple of clocksource drivers). Moreover, let's take the opportunity to add some documentation of these exported functions, as that is currently missing. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 35 +++++++++++++++++++++-------------- drivers/clocksource/sh_cmt.c | 8 ++++---- drivers/clocksource/sh_mtu2.c | 4 ++-- drivers/clocksource/sh_tmu.c | 8 ++++---- include/linux/pm_domain.h | 8 ++++---- 5 files changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 743268996336..9b4881b67683 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1363,14 +1363,7 @@ static void genpd_complete(struct device *dev) genpd_unlock(genpd); } -/** - * genpd_syscore_switch - Switch power during system core suspend or resume. - * @dev: Device that normally is marked as "always on" to switch power for. - * - * This routine may only be called during the system core (syscore) suspend or - * resume phase for devices whose "always on" flags are set. - */ -static void genpd_syscore_switch(struct device *dev, bool suspend) +static void genpd_switch_state(struct device *dev, bool suspend) { struct generic_pm_domain *genpd; @@ -1387,17 +1380,31 @@ static void genpd_syscore_switch(struct device *dev, bool suspend) } } -void pm_genpd_syscore_poweroff(struct device *dev) +/** + * dev_pm_genpd_suspend - Synchronously try to suspend the genpd for @dev + * @dev: The device that is attached to the genpd, that can be suspended. + * + * This routine should typically be called for a device that needs to be + * suspended during the syscore suspend phase. + */ +void dev_pm_genpd_suspend(struct device *dev) { - genpd_syscore_switch(dev, true); + genpd_switch_state(dev, true); } -EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweroff); +EXPORT_SYMBOL_GPL(dev_pm_genpd_suspend); -void pm_genpd_syscore_poweron(struct device *dev) +/** + * dev_pm_genpd_resume - Synchronously try to resume the genpd for @dev + * @dev: The device that is attached to the genpd, which needs to be resumed. + * + * This routine should typically be called for a device that needs to be resumed + * during the syscore resume phase. + */ +void dev_pm_genpd_resume(struct device *dev) { - genpd_syscore_switch(dev, false); + genpd_switch_state(dev, false); } -EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron); +EXPORT_SYMBOL_GPL(dev_pm_genpd_resume); #else /* !CONFIG_PM_SLEEP */ diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 760777458a90..7275d95de435 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -658,7 +658,7 @@ static void sh_cmt_clocksource_suspend(struct clocksource *cs) return; sh_cmt_stop(ch, FLAG_CLOCKSOURCE); - pm_genpd_syscore_poweroff(&ch->cmt->pdev->dev); + dev_pm_genpd_suspend(&ch->cmt->pdev->dev); } static void sh_cmt_clocksource_resume(struct clocksource *cs) @@ -668,7 +668,7 @@ static void sh_cmt_clocksource_resume(struct clocksource *cs) if (!ch->cs_enabled) return; - pm_genpd_syscore_poweron(&ch->cmt->pdev->dev); + dev_pm_genpd_resume(&ch->cmt->pdev->dev); sh_cmt_start(ch, FLAG_CLOCKSOURCE); } @@ -760,7 +760,7 @@ static void sh_cmt_clock_event_suspend(struct clock_event_device *ced) { struct sh_cmt_channel *ch = ced_to_sh_cmt(ced); - pm_genpd_syscore_poweroff(&ch->cmt->pdev->dev); + dev_pm_genpd_suspend(&ch->cmt->pdev->dev); clk_unprepare(ch->cmt->clk); } @@ -769,7 +769,7 @@ static void sh_cmt_clock_event_resume(struct clock_event_device *ced) struct sh_cmt_channel *ch = ced_to_sh_cmt(ced); clk_prepare(ch->cmt->clk); - pm_genpd_syscore_poweron(&ch->cmt->pdev->dev); + dev_pm_genpd_resume(&ch->cmt->pdev->dev); } static int sh_cmt_register_clockevent(struct sh_cmt_channel *ch, diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c index bfccb31e94ad..169a1fccc497 100644 --- a/drivers/clocksource/sh_mtu2.c +++ b/drivers/clocksource/sh_mtu2.c @@ -297,12 +297,12 @@ static int sh_mtu2_clock_event_set_periodic(struct clock_event_device *ced) static void sh_mtu2_clock_event_suspend(struct clock_event_device *ced) { - pm_genpd_syscore_poweroff(&ced_to_sh_mtu2(ced)->mtu->pdev->dev); + dev_pm_genpd_suspend(&ced_to_sh_mtu2(ced)->mtu->pdev->dev); } static void sh_mtu2_clock_event_resume(struct clock_event_device *ced) { - pm_genpd_syscore_poweron(&ced_to_sh_mtu2(ced)->mtu->pdev->dev); + dev_pm_genpd_resume(&ced_to_sh_mtu2(ced)->mtu->pdev->dev); } static void sh_mtu2_register_clockevent(struct sh_mtu2_channel *ch, diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index d41df9ba3725..b00dec0655cb 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -292,7 +292,7 @@ static void sh_tmu_clocksource_suspend(struct clocksource *cs) if (--ch->enable_count == 0) { __sh_tmu_disable(ch); - pm_genpd_syscore_poweroff(&ch->tmu->pdev->dev); + dev_pm_genpd_suspend(&ch->tmu->pdev->dev); } } @@ -304,7 +304,7 @@ static void sh_tmu_clocksource_resume(struct clocksource *cs) return; if (ch->enable_count++ == 0) { - pm_genpd_syscore_poweron(&ch->tmu->pdev->dev); + dev_pm_genpd_resume(&ch->tmu->pdev->dev); __sh_tmu_enable(ch); } } @@ -394,12 +394,12 @@ static int sh_tmu_clock_event_next(unsigned long delta, static void sh_tmu_clock_event_suspend(struct clock_event_device *ced) { - pm_genpd_syscore_poweroff(&ced_to_sh_tmu(ced)->tmu->pdev->dev); + dev_pm_genpd_suspend(&ced_to_sh_tmu(ced)->tmu->pdev->dev); } static void sh_tmu_clock_event_resume(struct clock_event_device *ced) { - pm_genpd_syscore_poweron(&ced_to_sh_tmu(ced)->tmu->pdev->dev); + dev_pm_genpd_resume(&ced_to_sh_tmu(ced)->tmu->pdev->dev); } static void sh_tmu_register_clockevent(struct sh_tmu_channel *ch, diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ad0ec481416..a8f93328daec 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -280,11 +280,11 @@ static inline int dev_pm_genpd_remove_notifier(struct device *dev) #endif #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP -void pm_genpd_syscore_poweroff(struct device *dev); -void pm_genpd_syscore_poweron(struct device *dev); +void dev_pm_genpd_suspend(struct device *dev); +void dev_pm_genpd_resume(struct device *dev); #else -static inline void pm_genpd_syscore_poweroff(struct device *dev) {} -static inline void pm_genpd_syscore_poweron(struct device *dev) {} +static inline void dev_pm_genpd_suspend(struct device *dev) {} +static inline void dev_pm_genpd_resume(struct device *dev) {} #endif /* OF PM domain providers */ From b9795a3e4e1cbf521bbb5ef240eb47803c303b02 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 3 Nov 2020 16:06:26 +0100 Subject: [PATCH 29/86] PM: domains: Enable dev_pm_genpd_suspend|resume() for suspend-to-idle The dev_pm_genpd_suspend|resume() have so far only been used during the syscore suspend/resume phases. However, during suspend-to-idle, where the syscore phases doesn't exist, similar operations are sometimes needed. An existing example are the timekeeping_suspend|resume() functions, which are being called both through a registered syscore ops during the syscore phases, but also as regular functions calls from cpuidle (via tick_freeze()) during suspend-to-idle. For similar reasons, let's enable the dev_pm_genpd_suspend|resume() APIs to be re-used for corresponding CPU devices that are attached to a genpd, during suspend-to-idle. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9b4881b67683..4a55f3c949ae 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1366,18 +1366,27 @@ static void genpd_complete(struct device *dev) static void genpd_switch_state(struct device *dev, bool suspend) { struct generic_pm_domain *genpd; + bool use_lock; genpd = dev_to_genpd_safe(dev); if (!genpd) return; + use_lock = genpd_is_irq_safe(genpd); + + if (use_lock) + genpd_lock(genpd); + if (suspend) { genpd->suspended_count++; - genpd_sync_power_off(genpd, false, 0); + genpd_sync_power_off(genpd, use_lock, 0); } else { - genpd_sync_power_on(genpd, false, 0); + genpd_sync_power_on(genpd, use_lock, 0); genpd->suspended_count--; } + + if (use_lock) + genpd_unlock(genpd); } /** @@ -1385,7 +1394,9 @@ static void genpd_switch_state(struct device *dev, bool suspend) * @dev: The device that is attached to the genpd, that can be suspended. * * This routine should typically be called for a device that needs to be - * suspended during the syscore suspend phase. + * suspended during the syscore suspend phase. It may also be called during + * suspend-to-idle to suspend a corresponding CPU device that is attached to a + * genpd. */ void dev_pm_genpd_suspend(struct device *dev) { @@ -1398,7 +1409,8 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_suspend); * @dev: The device that is attached to the genpd, which needs to be resumed. * * This routine should typically be called for a device that needs to be resumed - * during the syscore resume phase. + * during the syscore resume phase. It may also be called during suspend-to-idle + * to resume a corresponding CPU device that is attached to a genpd. */ void dev_pm_genpd_resume(struct device *dev) { From 670c90def03429a228229420fa48a17913fdcc0d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 3 Nov 2020 16:06:27 +0100 Subject: [PATCH 30/86] cpuidle: psci: Enable suspend-to-idle for PSCI OSI mode To select domain idlestates for cpuidle-psci when OSI mode has been enabled, the PM domains via genpd are being managed through runtime PM. This works fine for the regular idlepath, but it doesn't during system wide suspend. More precisely, the domain idlestates becomes temporarily disabled, which is because the PM core disables runtime PM for devices during system wide suspend. Later in the system suspend phase, genpd intends to deal with this from its ->suspend_noirq() callback, but this doesn't work as expected for a device corresponding to a CPU, because the domain idlestates needs to be selected on a per CPU basis (the PM core doesn't invoke the callbacks like that). To address this problem, let's enable the syscore flag for the corresponding CPU device that becomes successfully attached to its PM domain (applicable only in OSI mode). This informs the PM core to skip invoke the system wide suspend/resume callbacks for the device, thus also prevents genpd from screwing up its internal state of it. Moreover, to properly select a domain idlestate for the CPUs during suspend-to-idle, let's assign a specific ->enter_s2idle() callback for the corresponding domain idlestate (applicable only in OSI mode). From that callback, let's invoke dev_pm_genpd_suspend|resume(), as this allows a domain idlestate to be selected for the current CPU by genpd. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-psci-domain.c | 2 ++ drivers/cpuidle/cpuidle-psci.c | 34 +++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 4a031c62f92a..ff2c3f8e4668 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -327,6 +327,8 @@ struct device *psci_dt_attach_cpu(int cpu) if (cpu_online(cpu)) pm_runtime_get_sync(dev); + dev_pm_syscore_device(dev, true); + return dev; } diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index d928b37718bd..b51b5df08450 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -52,8 +53,9 @@ static inline int psci_enter_state(int idx, u32 state) return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state); } -static int psci_enter_domain_idle_state(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int idx) +static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx, + bool s2idle) { struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data); u32 *states = data->psci_states; @@ -66,7 +68,12 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev, return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - RCU_NONIDLE(pm_runtime_put_sync_suspend(pd_dev)); + rcu_irq_enter_irqson(); + if (s2idle) + dev_pm_genpd_suspend(pd_dev); + else + pm_runtime_put_sync_suspend(pd_dev); + rcu_irq_exit_irqson(); state = psci_get_domain_state(); if (!state) @@ -74,7 +81,12 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev, ret = psci_cpu_suspend_enter(state) ? -1 : idx; - RCU_NONIDLE(pm_runtime_get_sync(pd_dev)); + rcu_irq_enter_irqson(); + if (s2idle) + dev_pm_genpd_resume(pd_dev); + else + pm_runtime_get_sync(pd_dev); + rcu_irq_exit_irqson(); cpu_pm_exit(); @@ -83,6 +95,19 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev, return ret; } +static int psci_enter_domain_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + return __psci_enter_domain_idle_state(dev, drv, idx, false); +} + +static int psci_enter_s2idle_domain_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int idx) +{ + return __psci_enter_domain_idle_state(dev, drv, idx, true); +} + static int psci_idle_cpuhp_up(unsigned int cpu) { struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev); @@ -170,6 +195,7 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv, * deeper states. */ drv->states[state_count - 1].enter = psci_enter_domain_idle_state; + drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state; psci_cpuidle_use_cpuhp = true; return 0; From a04c75704678cce818c8250385c11d5efa0128c4 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 12 Nov 2020 15:09:27 +0100 Subject: [PATCH 31/86] dt-bindings: devfreq: Add documentation for the interconnect properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation for new optional properties in the exynos bus nodes: interconnects, #interconnect-cells, samsung,data-clock-ratio. These properties allow to specify the SoC interconnect structure which then allows the interconnect consumer devices to request specific bandwidth requirements. Acked-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Tested-by: Chanwoo Choi Acked-by: Rob Herring Signed-off-by: Artur ÅšwigoÅ„ Signed-off-by: Sylwester Nawrocki Signed-off-by: Chanwoo Choi --- .../bindings/devfreq/exynos-bus.txt | 71 ++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt index e71f752cc18f..bcaa2c08ac11 100644 --- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt +++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt @@ -51,6 +51,19 @@ Optional properties only for parent bus device: - exynos,saturation-ratio: the percentage value which is used to calibrate the performance count against total cycle count. +Optional properties for the interconnect functionality (QoS frequency +constraints): +- #interconnect-cells: should be 0. +- interconnects: as documented in ../interconnect.txt, describes a path at the + higher level interconnects used by this interconnect provider. + If this interconnect provider is directly linked to a top level interconnect + provider the property contains only one phandle. The provider extends + the interconnect graph by linking its node to a node registered by provider + pointed to by first phandle in the 'interconnects' property. + +- samsung,data-clock-ratio: ratio of the data throughput in B/s to minimum data + clock frequency in Hz, default value is 8 when this property is missing. + Detailed correlation between sub-blocks and power line according to Exynos SoC: - In case of Exynos3250, there are two power line as following: VDD_MIF |--- DMC @@ -135,7 +148,7 @@ Detailed correlation between sub-blocks and power line according to Exynos SoC: |--- PERIC (Fixed clock rate) |--- FSYS (Fixed clock rate) -Example1: +Example 1: Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to power line (regulator). The MIF (Memory Interface) AXI bus is used to transfer data between DRAM and CPU and uses the VDD_MIF regulator. @@ -184,7 +197,7 @@ Example1: |L5 |200000 |200000 |400000 |300000 | ||1000000 | ---------------------------------------------------------- -Example2 : +Example 2: The bus of DMC (Dynamic Memory Controller) block in exynos3250.dtsi is listed below: @@ -419,3 +432,57 @@ Example2 : devfreq = <&bus_leftbus>; status = "okay"; }; + +Example 3: + An interconnect path "bus_display -- bus_leftbus -- bus_dmc" on + Exynos4412 SoC with video mixer as an interconnect consumer device. + + soc { + bus_dmc: bus_dmc { + compatible = "samsung,exynos-bus"; + clocks = <&clock CLK_DIV_DMC>; + clock-names = "bus"; + operating-points-v2 = <&bus_dmc_opp_table>; + samsung,data-clock-ratio = <4>; + #interconnect-cells = <0>; + }; + + bus_leftbus: bus_leftbus { + compatible = "samsung,exynos-bus"; + clocks = <&clock CLK_DIV_GDL>; + clock-names = "bus"; + operating-points-v2 = <&bus_leftbus_opp_table>; + #interconnect-cells = <0>; + interconnects = <&bus_dmc>; + }; + + bus_display: bus_display { + compatible = "samsung,exynos-bus"; + clocks = <&clock CLK_ACLK160>; + clock-names = "bus"; + operating-points-v2 = <&bus_display_opp_table>; + #interconnect-cells = <0>; + interconnects = <&bus_leftbus &bus_dmc>; + }; + + bus_dmc_opp_table: opp_table1 { + compatible = "operating-points-v2"; + /* ... */ + } + + bus_leftbus_opp_table: opp_table3 { + compatible = "operating-points-v2"; + /* ... */ + }; + + bus_display_opp_table: opp_table4 { + compatible = "operating-points-v2"; + /* .. */ + }; + + &mixer { + compatible = "samsung,exynos4212-mixer"; + interconnects = <&bus_display &bus_dmc>; + /* ... */ + }; + }; From 404d59c57b26681a370eee059f9b8ad0f34327be Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 12 Nov 2020 15:09:30 +0100 Subject: [PATCH 32/86] PM / devfreq: exynos-bus: Add registration of interconnect child device This patch adds registration of a child platform device for the exynos interconnect driver. It is assumed that the interconnect provider will only be needed when #interconnect-cells property is present in the bus DT node, hence the child device will be created only when such a property is present. Acked-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki Signed-off-by: Chanwoo Choi --- drivers/devfreq/exynos-bus.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 1e684a448c9e..20447a401af4 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -24,6 +24,7 @@ struct exynos_bus { struct device *dev; + struct platform_device *icc_pdev; struct devfreq *devfreq; struct devfreq_event_dev **edev; @@ -156,6 +157,8 @@ static void exynos_bus_exit(struct device *dev) if (ret < 0) dev_warn(dev, "failed to disable the devfreq-event devices\n"); + platform_device_unregister(bus->icc_pdev); + dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); if (bus->opp_table) { @@ -168,6 +171,8 @@ static void exynos_bus_passive_exit(struct device *dev) { struct exynos_bus *bus = dev_get_drvdata(dev); + platform_device_unregister(bus->icc_pdev); + dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); } @@ -432,6 +437,18 @@ static int exynos_bus_probe(struct platform_device *pdev) if (ret < 0) goto err; + /* Create child platform device for the interconnect provider */ + if (of_get_property(dev->of_node, "#interconnect-cells", NULL)) { + bus->icc_pdev = platform_device_register_data( + dev, "exynos-generic-icc", + PLATFORM_DEVID_AUTO, NULL, 0); + + if (IS_ERR(bus->icc_pdev)) { + ret = PTR_ERR(bus->icc_pdev); + goto err; + } + } + max_state = bus->devfreq->profile->max_state; min_freq = (bus->devfreq->profile->freq_table[0] / 1000); max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000); From 63087265c288dc2d0f198ffba964c9fb383a61ed Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Nov 2020 12:55:17 +0000 Subject: [PATCH 33/86] cppc_cpufreq: fix misspelling, code style and readability issues Fix a few trivial issues in the cppc_cpufreq driver: - indentation of function arguments - consistent use of tabs (vs space) in defines - spelling: s/Offest/Offset, s/trasition/transition - order of local variables, from long pointers to structures to short ret and i (index) variables, to improve readability Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index f29e8d0553a8..0b6058ab695f 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -26,8 +26,8 @@ /* Minimum struct length needed for the DMI processor entry we want */ #define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48 -/* Offest in the DMI processor structure for the max frequency */ -#define DMI_PROCESSOR_MAX_SPEED 0x14 +/* Offset in the DMI processor structure for the max frequency */ +#define DMI_PROCESSOR_MAX_SPEED 0x14 /* * These structs contain information parsed from per CPU @@ -97,10 +97,10 @@ static u64 cppc_get_dmi_max_khz(void) * For perf/freq > Nominal, we use the ratio perf:freq at Nominal for conversion */ static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu, - unsigned int perf) + unsigned int perf) { - static u64 max_khz; struct cppc_perf_caps *caps = &cpu->perf_caps; + static u64 max_khz; u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { @@ -121,10 +121,10 @@ static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu, } static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu, - unsigned int freq) + unsigned int freq) { - static u64 max_khz; struct cppc_perf_caps *caps = &cpu->perf_caps; + static u64 max_khz; u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { @@ -146,11 +146,11 @@ static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu, } static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, + unsigned int relation) { - struct cppc_cpudata *cpu; struct cpufreq_freqs freqs; + struct cppc_cpudata *cpu; u32 desired_perf; int ret = 0; @@ -171,7 +171,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, if (ret) pr_debug("Failed to set target on CPU:%d. ret:%d\n", - cpu->cpu, ret); + cpu->cpu, ret); return ret; } @@ -193,13 +193,13 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); if (ret) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - cpu->perf_caps.lowest_perf, cpu_num, ret); + cpu->perf_caps.lowest_perf, cpu_num, ret); } /* * The PCC subspace describes the rate at which platform can accept commands * on the shared PCC channel (including READs which do not count towards freq - * trasition requests), so ideally we need to use the PCC values as a fallback + * transition requests), so ideally we need to use the PCC values as a fallback * if we don't have a platform specific transition_delay_us */ #ifdef CONFIG_ARM64 @@ -241,8 +241,8 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) { - struct cppc_cpudata *cpu; unsigned int cpu_num = policy->cpu; + struct cppc_cpudata *cpu; int ret = 0; cpu = all_cpu_data[policy->cpu]; @@ -252,7 +252,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) if (ret) { pr_debug("Err reading CPU%d perf capabilities. ret:%d\n", - cpu_num, ret); + cpu_num, ret); return ret; } @@ -313,7 +313,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); if (ret) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - cpu->perf_caps.highest_perf, cpu_num, ret); + cpu->perf_caps.highest_perf, cpu_num, ret); return ret; } @@ -450,8 +450,8 @@ static void cppc_check_hisi_workaround(void) static int __init cppc_cpufreq_init(void) { - int i, ret = 0; struct cppc_cpudata *cpu; + int i, ret = 0; if (acpi_disabled) return -ENODEV; From 48ad8dc94032ab43f0655190d9687f6d65b98f7f Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Nov 2020 12:55:18 +0000 Subject: [PATCH 34/86] cppc_cpufreq: clean up cpu, cpu_num and cpunum variable use In order to maintain the typical naming convention in the cpufreq framework: - replace the use of "cpu" variable name for cppc_cpudata pointers with "cpu_data" - replace variable names "cpu_num" and "cpunum" with "cpu" - make cpu variables unsigned int Where pertinent, also move the initialisation of cpu_data variable to its declaration and make consistent use of the local "cpu" variable. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 143 ++++++++++++++++----------------- 1 file changed, 69 insertions(+), 74 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 0b6058ab695f..317169453549 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -96,10 +96,10 @@ static u64 cppc_get_dmi_max_khz(void) * and extrapolate the rest * For perf/freq > Nominal, we use the ratio perf:freq at Nominal for conversion */ -static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu, +static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data, unsigned int perf) { - struct cppc_perf_caps *caps = &cpu->perf_caps; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; static u64 max_khz; u64 mul, div; @@ -120,10 +120,10 @@ static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu, return (u64)perf * mul / div; } -static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu, +static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, unsigned int freq) { - struct cppc_perf_caps *caps = &cpu->perf_caps; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; static u64 max_khz; u64 mul, div; @@ -149,29 +149,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { + struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; struct cpufreq_freqs freqs; - struct cppc_cpudata *cpu; u32 desired_perf; int ret = 0; - cpu = all_cpu_data[policy->cpu]; - - desired_perf = cppc_cpufreq_khz_to_perf(cpu, target_freq); + desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq); /* Return if it is exactly the same perf */ - if (desired_perf == cpu->perf_ctrls.desired_perf) + if (desired_perf == cpu_data->perf_ctrls.desired_perf) return ret; - cpu->perf_ctrls.desired_perf = desired_perf; + cpu_data->perf_ctrls.desired_perf = desired_perf; freqs.old = policy->cur; freqs.new = target_freq; cpufreq_freq_transition_begin(policy, &freqs); - ret = cppc_set_perf(cpu->cpu, &cpu->perf_ctrls); + ret = cppc_set_perf(cpu_data->cpu, &cpu_data->perf_ctrls); cpufreq_freq_transition_end(policy, &freqs, ret != 0); if (ret) pr_debug("Failed to set target on CPU:%d. ret:%d\n", - cpu->cpu, ret); + cpu_data->cpu, ret); return ret; } @@ -184,16 +182,16 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy) static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) { - int cpu_num = policy->cpu; - struct cppc_cpudata *cpu = all_cpu_data[cpu_num]; + struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + unsigned int cpu = policy->cpu; int ret; - cpu->perf_ctrls.desired_perf = cpu->perf_caps.lowest_perf; + cpu_data->perf_ctrls.desired_perf = cpu_data->perf_caps.lowest_perf; - ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); if (ret) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - cpu->perf_caps.lowest_perf, cpu_num, ret); + cpu_data->perf_caps.lowest_perf, cpu, ret); } /* @@ -205,7 +203,7 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) #ifdef CONFIG_ARM64 #include -static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) +static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) { unsigned long implementor = read_cpuid_implementor(); unsigned long part_num = read_cpuid_part_number(); @@ -233,7 +231,7 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) #else -static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) +static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) { return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } @@ -241,54 +239,52 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) { - unsigned int cpu_num = policy->cpu; - struct cppc_cpudata *cpu; + struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + unsigned int cpu = policy->cpu; int ret = 0; - cpu = all_cpu_data[policy->cpu]; - - cpu->cpu = cpu_num; - ret = cppc_get_perf_caps(policy->cpu, &cpu->perf_caps); + cpu_data->cpu = cpu; + ret = cppc_get_perf_caps(cpu, &cpu_data->perf_caps); if (ret) { pr_debug("Err reading CPU%d perf capabilities. ret:%d\n", - cpu_num, ret); + cpu, ret); return ret; } /* Convert the lowest and nominal freq from MHz to KHz */ - cpu->perf_caps.lowest_freq *= 1000; - cpu->perf_caps.nominal_freq *= 1000; + cpu_data->perf_caps.lowest_freq *= 1000; + cpu_data->perf_caps.nominal_freq *= 1000; /* * Set min to lowest nonlinear perf to avoid any efficiency penalty (see * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ - policy->min = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.nominal_perf); + policy->min = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.lowest_nonlinear_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance * available if userspace wants to use any perf between lowest & lowest * nonlinear perf */ - policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_perf); - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.nominal_perf); + policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.lowest_perf); + policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.nominal_perf); - policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num); - policy->shared_type = cpu->shared_type; + policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); + policy->shared_type = cpu_data->shared_type; if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { int i; - cpumask_copy(policy->cpus, cpu->shared_cpu_map); + cpumask_copy(policy->cpus, cpu_data->shared_cpu_map); for_each_cpu(i, policy->cpus) { - if (unlikely(i == policy->cpu)) + if (unlikely(i == cpu)) continue; - memcpy(&all_cpu_data[i]->perf_caps, &cpu->perf_caps, - sizeof(cpu->perf_caps)); + memcpy(&all_cpu_data[i]->perf_caps, &cpu_data->perf_caps, + sizeof(cpu_data->perf_caps)); } } else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) { /* Support only SW_ANY for now. */ @@ -296,24 +292,24 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) return -EFAULT; } - cpu->cur_policy = policy; + cpu_data->cur_policy = policy; /* * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost * is supported. */ - if (cpu->perf_caps.highest_perf > cpu->perf_caps.nominal_perf) + if (cpu_data->perf_caps.highest_perf > cpu_data->perf_caps.nominal_perf) boost_supported = true; /* Set policy->cur to max now. The governors will adjust later. */ - policy->cur = cppc_cpufreq_perf_to_khz(cpu, - cpu->perf_caps.highest_perf); - cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf; + policy->cur = cppc_cpufreq_perf_to_khz(cpu_data, + cpu_data->perf_caps.highest_perf); + cpu_data->perf_ctrls.desired_perf = cpu_data->perf_caps.highest_perf; - ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); if (ret) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - cpu->perf_caps.highest_perf, cpu_num, ret); + cpu_data->perf_caps.highest_perf, cpu, ret); return ret; } @@ -326,7 +322,7 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu, +static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, struct cppc_perf_fb_ctrs fb_ctrs_t0, struct cppc_perf_fb_ctrs fb_ctrs_t1) { @@ -345,33 +341,33 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu, delivered_perf = (reference_perf * delta_delivered) / delta_reference; else - delivered_perf = cpu->perf_ctrls.desired_perf; + delivered_perf = cpu_data->perf_ctrls.desired_perf; - return cppc_cpufreq_perf_to_khz(cpu, delivered_perf); + return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); } -static unsigned int cppc_cpufreq_get_rate(unsigned int cpunum) +static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) { struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; - struct cppc_cpudata *cpu = all_cpu_data[cpunum]; + struct cppc_cpudata *cpu_data = all_cpu_data[cpu]; int ret; - ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t0); + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); if (ret) return ret; udelay(2); /* 2usec delay between sampling */ - ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t1); + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1); if (ret) return ret; - return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1); + return cppc_get_rate_from_fbctrs(cpu_data, fb_ctrs_t0, fb_ctrs_t1); } static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) { - struct cppc_cpudata *cpudata; + struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; int ret; if (!boost_supported) { @@ -379,13 +375,12 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) return -EINVAL; } - cpudata = all_cpu_data[policy->cpu]; if (state) - policy->max = cppc_cpufreq_perf_to_khz(cpudata, - cpudata->perf_caps.highest_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu_data, + cpu_data->perf_caps.highest_perf); else - policy->max = cppc_cpufreq_perf_to_khz(cpudata, - cpudata->perf_caps.nominal_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu_data, + cpu_data->perf_caps.nominal_perf); policy->cpuinfo.max_freq = policy->max; ret = freq_qos_update_request(policy->max_freq_req, policy->max); @@ -412,17 +407,17 @@ static struct cpufreq_driver cppc_cpufreq_driver = { * platform specific mechanism. We reuse the desired performance register to * store the real performance calculated by the platform. */ -static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpunum) +static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) { - struct cppc_cpudata *cpudata = all_cpu_data[cpunum]; + struct cppc_cpudata *cpu_data = all_cpu_data[cpu]; u64 desired_perf; int ret; - ret = cppc_get_desired_perf(cpunum, &desired_perf); + ret = cppc_get_desired_perf(cpu, &desired_perf); if (ret < 0) return -EIO; - return cppc_cpufreq_perf_to_khz(cpudata, desired_perf); + return cppc_cpufreq_perf_to_khz(cpu_data, desired_perf); } static void cppc_check_hisi_workaround(void) @@ -450,7 +445,7 @@ static void cppc_check_hisi_workaround(void) static int __init cppc_cpufreq_init(void) { - struct cppc_cpudata *cpu; + struct cppc_cpudata *cpu_data; int i, ret = 0; if (acpi_disabled) @@ -466,8 +461,8 @@ static int __init cppc_cpufreq_init(void) if (!all_cpu_data[i]) goto out; - cpu = all_cpu_data[i]; - if (!zalloc_cpumask_var(&cpu->shared_cpu_map, GFP_KERNEL)) + cpu_data = all_cpu_data[i]; + if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL)) goto out; } @@ -487,11 +482,11 @@ static int __init cppc_cpufreq_init(void) out: for_each_possible_cpu(i) { - cpu = all_cpu_data[i]; - if (!cpu) + cpu_data = all_cpu_data[i]; + if (!cpu_data) break; - free_cpumask_var(cpu->shared_cpu_map); - kfree(cpu); + free_cpumask_var(cpu_data->shared_cpu_map); + kfree(cpu_data); } kfree(all_cpu_data); @@ -500,15 +495,15 @@ out: static void __exit cppc_cpufreq_exit(void) { - struct cppc_cpudata *cpu; + struct cppc_cpudata *cpu_data; int i; cpufreq_unregister_driver(&cppc_cpufreq_driver); for_each_possible_cpu(i) { - cpu = all_cpu_data[i]; - free_cpumask_var(cpu->shared_cpu_map); - kfree(cpu); + cpu_data = all_cpu_data[i]; + free_cpumask_var(cpu_data->shared_cpu_map); + kfree(cpu_data); } kfree(all_cpu_data); From bb025fb6c276ac874b718b9d884b7ee1099b2c22 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Nov 2020 12:55:19 +0000 Subject: [PATCH 35/86] cppc_cpufreq: simplify use of performance capabilities The CPPC performance capabilities are used significantly throughout the driver. Simplify the use of them by introducing a local pointer "caps" to point to cpu_data->perf_caps, in functions that access performance capabilities often. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 40 +++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 317169453549..7cc9bd8568de 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -183,15 +183,16 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy) static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) { struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; unsigned int cpu = policy->cpu; int ret; - cpu_data->perf_ctrls.desired_perf = cpu_data->perf_caps.lowest_perf; + cpu_data->perf_ctrls.desired_perf = caps->lowest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); if (ret) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - cpu_data->perf_caps.lowest_perf, cpu, ret); + caps->lowest_perf, cpu, ret); } /* @@ -240,11 +241,12 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) { struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; unsigned int cpu = policy->cpu; int ret = 0; cpu_data->cpu = cpu; - ret = cppc_get_perf_caps(cpu, &cpu_data->perf_caps); + ret = cppc_get_perf_caps(cpu, caps); if (ret) { pr_debug("Err reading CPU%d perf capabilities. ret:%d\n", @@ -253,23 +255,27 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) } /* Convert the lowest and nominal freq from MHz to KHz */ - cpu_data->perf_caps.lowest_freq *= 1000; - cpu_data->perf_caps.nominal_freq *= 1000; + caps->lowest_freq *= 1000; + caps->nominal_freq *= 1000; /* * Set min to lowest nonlinear perf to avoid any efficiency penalty (see * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ - policy->min = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.nominal_perf); + policy->min = cppc_cpufreq_perf_to_khz(cpu_data, + caps->lowest_nonlinear_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu_data, + caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance * available if userspace wants to use any perf between lowest & lowest * nonlinear perf */ - policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.lowest_perf); - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data, cpu_data->perf_caps.nominal_perf); + policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data, + caps->lowest_perf); + policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data, + caps->nominal_perf); policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; @@ -283,7 +289,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) if (unlikely(i == cpu)) continue; - memcpy(&all_cpu_data[i]->perf_caps, &cpu_data->perf_caps, + memcpy(&all_cpu_data[i]->perf_caps, caps, sizeof(cpu_data->perf_caps)); } } else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) { @@ -298,18 +304,17 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost * is supported. */ - if (cpu_data->perf_caps.highest_perf > cpu_data->perf_caps.nominal_perf) + if (caps->highest_perf > caps->nominal_perf) boost_supported = true; /* Set policy->cur to max now. The governors will adjust later. */ - policy->cur = cppc_cpufreq_perf_to_khz(cpu_data, - cpu_data->perf_caps.highest_perf); - cpu_data->perf_ctrls.desired_perf = cpu_data->perf_caps.highest_perf; + policy->cur = cppc_cpufreq_perf_to_khz(cpu_data, caps->highest_perf); + cpu_data->perf_ctrls.desired_perf = caps->highest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); if (ret) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - cpu_data->perf_caps.highest_perf, cpu, ret); + caps->highest_perf, cpu, ret); return ret; } @@ -368,6 +373,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) { struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; int ret; if (!boost_supported) { @@ -377,10 +383,10 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) if (state) policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - cpu_data->perf_caps.highest_perf); + caps->highest_perf); else policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - cpu_data->perf_caps.nominal_perf); + caps->nominal_perf); policy->cpuinfo.max_freq = policy->max; ret = freq_qos_update_request(policy->max_freq_req, policy->max); From 98fd9972ed60d26915227a8c7febbd86e262a96b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 23 Nov 2020 03:27:15 +0300 Subject: [PATCH 36/86] PM / devfreq: tegra20: Deprecate in a favor of emc-stat based driver Remove tegra20-devfreq in order to replace it with a EMC_STAT based devfreq driver. Previously we were going to use MC_STAT based tegra20-devfreq driver because EMC_STAT wasn't working properly, but now that problem is resolved. This resolves complications imposed by the removed driver since it was depending on both EMC and MC drivers simultaneously. Acked-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- MAINTAINERS | 1 - drivers/devfreq/Kconfig | 10 -- drivers/devfreq/Makefile | 1 - drivers/devfreq/tegra20-devfreq.c | 210 ------------------------------ 4 files changed, 222 deletions(-) delete mode 100644 drivers/devfreq/tegra20-devfreq.c diff --git a/MAINTAINERS b/MAINTAINERS index e73636b75f29..105a33e62ee1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11339,7 +11339,6 @@ L: linux-pm@vger.kernel.org L: linux-tegra@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git S: Maintained -F: drivers/devfreq/tegra20-devfreq.c F: drivers/devfreq/tegra30-devfreq.c MEMORY MANAGEMENT diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 0ee36ae2fa79..00704efe6398 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -121,16 +121,6 @@ config ARM_TEGRA_DEVFREQ It reads ACTMON counters of memory controllers and adjusts the operating frequencies and voltages with OPP support. -config ARM_TEGRA20_DEVFREQ - tristate "NVIDIA Tegra20 DEVFREQ Driver" - depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST - depends on COMMON_CLK - select DEVFREQ_GOV_SIMPLE_ONDEMAND - help - This adds the DEVFREQ driver for the Tegra20 family of SoCs. - It reads Memory Controller counters and adjusts the operating - frequencies and voltages with OPP support. - config ARM_RK3399_DMC_DEVFREQ tristate "ARM RK3399 DMC DEVFREQ Driver" depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 3ca1ad0ecb97..a16333ea7034 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o -obj-$(CONFIG_ARM_TEGRA20_DEVFREQ) += tegra20-devfreq.o # DEVFREQ Event Drivers obj-$(CONFIG_PM_DEVFREQ_EVENT) += event/ diff --git a/drivers/devfreq/tegra20-devfreq.c b/drivers/devfreq/tegra20-devfreq.c deleted file mode 100644 index fd801534771d..000000000000 --- a/drivers/devfreq/tegra20-devfreq.c +++ /dev/null @@ -1,210 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NVIDIA Tegra20 devfreq driver - * - * Copyright (C) 2019 GRATE-DRIVER project - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "governor.h" - -#define MC_STAT_CONTROL 0x90 -#define MC_STAT_EMC_CLOCK_LIMIT 0xa0 -#define MC_STAT_EMC_CLOCKS 0xa4 -#define MC_STAT_EMC_CONTROL 0xa8 -#define MC_STAT_EMC_COUNT 0xb8 - -#define EMC_GATHER_CLEAR (1 << 8) -#define EMC_GATHER_ENABLE (3 << 8) - -struct tegra_devfreq { - struct devfreq *devfreq; - struct clk *emc_clock; - void __iomem *regs; -}; - -static int tegra_devfreq_target(struct device *dev, unsigned long *freq, - u32 flags) -{ - struct tegra_devfreq *tegra = dev_get_drvdata(dev); - struct devfreq *devfreq = tegra->devfreq; - struct dev_pm_opp *opp; - unsigned long rate; - int err; - - opp = devfreq_recommended_opp(dev, freq, flags); - if (IS_ERR(opp)) - return PTR_ERR(opp); - - rate = dev_pm_opp_get_freq(opp); - dev_pm_opp_put(opp); - - err = clk_set_min_rate(tegra->emc_clock, rate); - if (err) - return err; - - err = clk_set_rate(tegra->emc_clock, 0); - if (err) - goto restore_min_rate; - - return 0; - -restore_min_rate: - clk_set_min_rate(tegra->emc_clock, devfreq->previous_freq); - - return err; -} - -static int tegra_devfreq_get_dev_status(struct device *dev, - struct devfreq_dev_status *stat) -{ - struct tegra_devfreq *tegra = dev_get_drvdata(dev); - - /* - * EMC_COUNT returns number of memory events, that number is lower - * than the number of clocks. Conversion ratio of 1/8 results in a - * bit higher bandwidth than actually needed, it is good enough for - * the time being because drivers don't support requesting minimum - * needed memory bandwidth yet. - * - * TODO: adjust the ratio value once relevant drivers will support - * memory bandwidth management. - */ - stat->busy_time = readl_relaxed(tegra->regs + MC_STAT_EMC_COUNT); - stat->total_time = readl_relaxed(tegra->regs + MC_STAT_EMC_CLOCKS) / 8; - stat->current_frequency = clk_get_rate(tegra->emc_clock); - - writel_relaxed(EMC_GATHER_CLEAR, tegra->regs + MC_STAT_CONTROL); - writel_relaxed(EMC_GATHER_ENABLE, tegra->regs + MC_STAT_CONTROL); - - return 0; -} - -static struct devfreq_dev_profile tegra_devfreq_profile = { - .polling_ms = 500, - .target = tegra_devfreq_target, - .get_dev_status = tegra_devfreq_get_dev_status, -}; - -static struct tegra_mc *tegra_get_memory_controller(void) -{ - struct platform_device *pdev; - struct device_node *np; - struct tegra_mc *mc; - - np = of_find_compatible_node(NULL, NULL, "nvidia,tegra20-mc-gart"); - if (!np) - return ERR_PTR(-ENOENT); - - pdev = of_find_device_by_node(np); - of_node_put(np); - if (!pdev) - return ERR_PTR(-ENODEV); - - mc = platform_get_drvdata(pdev); - if (!mc) - return ERR_PTR(-EPROBE_DEFER); - - return mc; -} - -static int tegra_devfreq_probe(struct platform_device *pdev) -{ - struct tegra_devfreq *tegra; - struct tegra_mc *mc; - unsigned long max_rate; - unsigned long rate; - int err; - - mc = tegra_get_memory_controller(); - if (IS_ERR(mc)) { - err = PTR_ERR(mc); - dev_err(&pdev->dev, "failed to get memory controller: %d\n", - err); - return err; - } - - tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL); - if (!tegra) - return -ENOMEM; - - /* EMC is a system-critical clock that is always enabled */ - tegra->emc_clock = devm_clk_get(&pdev->dev, "emc"); - if (IS_ERR(tegra->emc_clock)) - return dev_err_probe(&pdev->dev, PTR_ERR(tegra->emc_clock), - "failed to get emc clock\n"); - - tegra->regs = mc->regs; - - max_rate = clk_round_rate(tegra->emc_clock, ULONG_MAX); - - for (rate = 0; rate <= max_rate; rate++) { - rate = clk_round_rate(tegra->emc_clock, rate); - - err = dev_pm_opp_add(&pdev->dev, rate, 0); - if (err) { - dev_err(&pdev->dev, "failed to add opp: %d\n", err); - goto remove_opps; - } - } - - /* - * Reset statistic gathers state, select global bandwidth for the - * statistics collection mode and set clocks counter saturation - * limit to maximum. - */ - writel_relaxed(0x00000000, tegra->regs + MC_STAT_CONTROL); - writel_relaxed(0x00000000, tegra->regs + MC_STAT_EMC_CONTROL); - writel_relaxed(0xffffffff, tegra->regs + MC_STAT_EMC_CLOCK_LIMIT); - - platform_set_drvdata(pdev, tegra); - - tegra->devfreq = devfreq_add_device(&pdev->dev, &tegra_devfreq_profile, - DEVFREQ_GOV_SIMPLE_ONDEMAND, NULL); - if (IS_ERR(tegra->devfreq)) { - err = PTR_ERR(tegra->devfreq); - goto remove_opps; - } - - return 0; - -remove_opps: - dev_pm_opp_remove_all_dynamic(&pdev->dev); - - return err; -} - -static int tegra_devfreq_remove(struct platform_device *pdev) -{ - struct tegra_devfreq *tegra = platform_get_drvdata(pdev); - - devfreq_remove_device(tegra->devfreq); - dev_pm_opp_remove_all_dynamic(&pdev->dev); - - return 0; -} - -static struct platform_driver tegra_devfreq_driver = { - .probe = tegra_devfreq_probe, - .remove = tegra_devfreq_remove, - .driver = { - .name = "tegra20-devfreq", - }, -}; -module_platform_driver(tegra_devfreq_driver); - -MODULE_ALIAS("platform:tegra20-devfreq"); -MODULE_AUTHOR("Dmitry Osipenko "); -MODULE_DESCRIPTION("NVIDIA Tegra20 devfreq driver"); -MODULE_LICENSE("GPL v2"); From d23e95c09067618eabd6d0e8cff372f0ce517c84 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 10 Nov 2020 18:36:17 -0800 Subject: [PATCH 37/86] pm-graph v5.8 - if wakeups occur in s2idle: "freeze time: N (-x ms waking y times) ms" - change FREEZELOOP and FREEZEWAKE to S2LOOP and S2WAKE for brevity - returns all sysfs vals to their initial state after testing - use the dmesg log for debugging until the test is completed, instrument the executeSuspend process to have a full trace, if test completes, formal dmesg log overwrites the debug log - fix CPU_ON and CPU_OFF devices in the timeline, should include [n] Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/README | 4 +- tools/power/pm-graph/sleepgraph.py | 387 +++++++++++++++++------------ 2 files changed, 227 insertions(+), 164 deletions(-) diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README index 89d0a7dab4bc..da468bd510ca 100644 --- a/tools/power/pm-graph/README +++ b/tools/power/pm-graph/README @@ -6,7 +6,7 @@ |_| |___/ |_| pm-graph: suspend/resume/boot timing analysis tools - Version: 5.7 + Version: 5.8 Author: Todd Brandt Home Page: https://01.org/pm-graph @@ -61,7 +61,7 @@ - runs with python2 or python3, choice is made by /usr/bin/python link - python - python-configparser (for python2 sleepgraph) - - python-requests (for googlesheet.py) + - python-requests (for stresstester.py) - linux-tools-common (for turbostat usage in sleepgraph) Ubuntu: diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 1bc36a1db14f..81f4b8abbdf7 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -81,7 +81,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.7' + version = '5.8' ansi = False rs = 0 display = '' @@ -92,8 +92,9 @@ class SystemValues: testlog = True dmesglog = True ftracelog = False + acpidebug = True tstat = True - mindevlen = 0.0 + mindevlen = 0.0001 mincglen = 0.0 cgphase = '' cgtest = -1 @@ -115,6 +116,7 @@ class SystemValues: fpdtpath = '/sys/firmware/acpi/tables/FPDT' epath = '/sys/kernel/debug/tracing/events/power/' pmdpath = '/sys/power/pm_debug_messages' + acpipath='/sys/module/acpi/parameters/debug_level' traceevents = [ 'suspend_resume', 'wakeup_source_activate', @@ -162,16 +164,16 @@ class SystemValues: devdump = False mixedphaseheight = True devprops = dict() + cfgdef = dict() platinfo = [] predelay = 0 postdelay = 0 - pmdebug = '' tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f' tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f' tracefuncs = { 'sys_sync': {}, 'ksys_sync': {}, - 'pm_notifier_call_chain_robust': {}, + '__pm_notifier_call_chain': {}, 'pm_prepare_console': {}, 'pm_notifier_call_chain': {}, 'freeze_processes': {}, @@ -490,9 +492,9 @@ class SystemValues: call('echo 0 > %s/wakealarm' % self.rtcpath, shell=True) def initdmesg(self): # get the latest time stamp from the dmesg log - fp = Popen('dmesg', stdout=PIPE).stdout + lines = Popen('dmesg', stdout=PIPE).stdout.readlines() ktime = '0' - for line in fp: + for line in reversed(lines): line = ascii(line).replace('\r\n', '') idx = line.find('[') if idx > 1: @@ -500,7 +502,7 @@ class SystemValues: m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if(m): ktime = m.group('ktime') - fp.close() + break self.dmesgstart = float(ktime) def getdmesg(self, testdata): op = self.writeDatafileHeader(self.dmesgfile, testdata) @@ -715,8 +717,6 @@ class SystemValues: self.fsetVal('0', 'events/kprobes/enable') self.fsetVal('', 'kprobe_events') self.fsetVal('1024', 'buffer_size_kb') - if self.pmdebug: - self.setVal(self.pmdebug, self.pmdpath) def setupAllKprobes(self): for name in self.tracefuncs: self.defaultKprobe(name, self.tracefuncs[name]) @@ -740,11 +740,7 @@ class SystemValues: # turn trace off self.fsetVal('0', 'tracing_on') self.cleanupFtrace() - # pm debug messages - pv = self.getVal(self.pmdpath) - if pv != '1': - self.setVal('1', self.pmdpath) - self.pmdebug = pv + self.testVal(self.pmdpath, 'basic', '1') # set the trace clock to global self.fsetVal('global', 'trace_clock') self.fsetVal('nop', 'current_tracer') @@ -900,6 +896,14 @@ class SystemValues: if isgz: return gzip.open(filename, mode+'t') return open(filename, mode) + def putlog(self, filename, text): + with self.openlog(filename, 'a') as fp: + fp.write(text) + fp.close() + def dlog(self, text): + self.putlog(self.dmesgfile, '# %s\n' % text) + def flog(self, text): + self.putlog(self.ftracefile, text) def b64unzip(self, data): try: out = codecs.decode(base64.b64decode(data), 'zlib').decode() @@ -992,9 +996,7 @@ class SystemValues: # add a line for each of these commands with their outputs for name, cmdline, info in cmdafter: footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info)) - - with self.openlog(self.ftracefile, 'a') as fp: - fp.write(footer) + self.flog(footer) return True def commonPrefix(self, list): if len(list) < 2: @@ -1034,6 +1036,7 @@ class SystemValues: cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2]) if not cmdpath or (begin and not delta): continue + self.dlog('[%s]' % cmdline) try: fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout info = ascii(fp.read()).strip() @@ -1060,6 +1063,29 @@ class SystemValues: else: out.append((name, cmdline, '\tnothing' if not info else info)) return out + def testVal(self, file, fmt='basic', value=''): + if file == 'restoreall': + for f in self.cfgdef: + if os.path.exists(f): + fp = open(f, 'w') + fp.write(self.cfgdef[f]) + fp.close() + self.cfgdef = dict() + elif value and os.path.exists(file): + fp = open(file, 'r+') + if fmt == 'radio': + m = re.match('.*\[(?P.*)\].*', fp.read()) + if m: + self.cfgdef[file] = m.group('v') + elif fmt == 'acpi': + line = fp.read().strip().split('\n')[-1] + m = re.match('.* (?P[0-9A-Fx]*) .*', line) + if m: + self.cfgdef[file] = m.group('v') + else: + self.cfgdef[file] = fp.read().strip() + fp.write(value) + fp.close() def haveTurbostat(self): if not self.tstat: return False @@ -1201,6 +1227,57 @@ class SystemValues: self.multitest[sz] *= 1440 elif unit == 'h': self.multitest[sz] *= 60 + def displayControl(self, cmd): + xset, ret = 'timeout 10 xset -d :0.0 {0}', 0 + if self.sudouser: + xset = 'sudo -u %s %s' % (self.sudouser, xset) + if cmd == 'init': + ret = call(xset.format('dpms 0 0 0'), shell=True) + if not ret: + ret = call(xset.format('s off'), shell=True) + elif cmd == 'reset': + ret = call(xset.format('s reset'), shell=True) + elif cmd in ['on', 'off', 'standby', 'suspend']: + b4 = self.displayControl('stat') + ret = call(xset.format('dpms force %s' % cmd), shell=True) + if not ret: + curr = self.displayControl('stat') + self.vprint('Display Switched: %s -> %s' % (b4, curr)) + if curr != cmd: + self.vprint('WARNING: Display failed to change to %s' % cmd) + if ret: + self.vprint('WARNING: Display failed to change to %s with xset' % cmd) + return ret + elif cmd == 'stat': + fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout + ret = 'unknown' + for line in fp: + m = re.match('[\s]*Monitor is (?P.*)', ascii(line)) + if(m and len(m.group('m')) >= 2): + out = m.group('m').lower() + ret = out[3:] if out[0:2] == 'in' else out + break + fp.close() + return ret + def setRuntimeSuspend(self, before=True): + if before: + # runtime suspend disable or enable + if self.rs > 0: + self.rstgt, self.rsval, self.rsdir = 'on', 'auto', 'enabled' + else: + self.rstgt, self.rsval, self.rsdir = 'auto', 'on', 'disabled' + pprint('CONFIGURING RUNTIME SUSPEND...') + self.rslist = deviceInfo(self.rstgt) + for i in self.rslist: + self.setVal(self.rsval, i) + pprint('runtime suspend %s on all devices (%d changed)' % (self.rsdir, len(self.rslist))) + pprint('waiting 5 seconds...') + time.sleep(5) + else: + # runtime suspend re-enable or re-disable + for i in self.rslist: + self.setVal(self.rstgt, i) + pprint('runtime suspend settings restored on %d devices' % len(self.rslist)) sysvals = SystemValues() switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0'] @@ -1640,15 +1717,20 @@ class Data: if 'resume_machine' in phase and 'suspend_machine' in lp: tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start'] tL = tR - tS - if tL > 0: - left = True if tR > tZero else False - self.trimTime(tS, tL, left) - if 'trying' in self.dmesg[lp] and self.dmesg[lp]['trying'] >= 0.001: - tTry = round(self.dmesg[lp]['trying'] * 1000) - text = '%.0f (-%.0f waking)' % (tL * 1000, tTry) + if tL <= 0: + continue + left = True if tR > tZero else False + self.trimTime(tS, tL, left) + if 'waking' in self.dmesg[lp]: + tCnt = self.dmesg[lp]['waking'][0] + if self.dmesg[lp]['waking'][1] >= 0.001: + tTry = '-%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000)) else: - text = '%.0f' % (tL * 1000) - self.tLow.append(text) + tTry = '-%.3f' % (self.dmesg[lp]['waking'][1] * 1000) + text = '%.0f (%s ms waking %d times)' % (tL * 1000, tTry, tCnt) + else: + text = '%.0f' % (tL * 1000) + self.tLow.append(text) lp = phase def getMemTime(self): if not self.hwstart or not self.hwend: @@ -1921,7 +2003,7 @@ class Data: for dev in list: length = (list[dev]['end'] - list[dev]['start']) * 1000 width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal) - if width != '0.000000' and length >= mindevlen: + if length >= mindevlen: devlist.append(dev) self.tdevlist[phase] = devlist def addHorizontalDivider(self, devname, devend): @@ -3316,9 +3398,10 @@ def parseTraceLog(live=False): # trim out s2idle loops, track time trying to freeze llp = data.lastPhase(2) if llp.startswith('suspend_machine'): - if 'trying' not in data.dmesg[llp]: - data.dmesg[llp]['trying'] = 0 - data.dmesg[llp]['trying'] += \ + if 'waking' not in data.dmesg[llp]: + data.dmesg[llp]['waking'] = [0, 0.0] + data.dmesg[llp]['waking'][0] += 1 + data.dmesg[llp]['waking'][1] += \ t.time - data.dmesg[lp]['start'] data.currphase = '' del data.dmesg[lp] @@ -4555,7 +4638,7 @@ def createHTML(testruns, testfail): # draw the devices for this phase phaselist = data.dmesg[b]['list'] for d in sorted(data.tdevlist[b]): - dname = d if '[' not in d else d.split('[')[0] + dname = d if ('[' not in d or 'CPU' in d) else d.split('[')[0] name, dev = dname, phaselist[d] drv = xtraclass = xtrainfo = xtrastyle = '' if 'htmlclass' in dev: @@ -5194,156 +5277,146 @@ def addScriptCode(hf, testruns): '\n' hf.write(script_code); -def setRuntimeSuspend(before=True): - global sysvals - sv = sysvals - if sv.rs == 0: - return - if before: - # runtime suspend disable or enable - if sv.rs > 0: - sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled' - else: - sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled' - pprint('CONFIGURING RUNTIME SUSPEND...') - sv.rslist = deviceInfo(sv.rstgt) - for i in sv.rslist: - sv.setVal(sv.rsval, i) - pprint('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist))) - pprint('waiting 5 seconds...') - time.sleep(5) - else: - # runtime suspend re-enable or re-disable - for i in sv.rslist: - sv.setVal(sv.rstgt, i) - pprint('runtime suspend settings restored on %d devices' % len(sv.rslist)) - # Function: executeSuspend # Description: # Execute system suspend through the sysfs interface, then copy the output # dmesg and ftrace files to the test output directory. def executeSuspend(quiet=False): - pm = ProcessMonitor() - tp = sysvals.tpath - if sysvals.wifi: - wifi = sysvals.checkWifi() + sv, tp, pm = sysvals, sysvals.tpath, ProcessMonitor() + if sv.wifi: + wifi = sv.checkWifi() + sv.dlog('wifi check, connected device is "%s"' % wifi) testdata = [] # run these commands to prepare the system for suspend - if sysvals.display: + if sv.display: if not quiet: - pprint('SET DISPLAY TO %s' % sysvals.display.upper()) - displayControl(sysvals.display) + pprint('SET DISPLAY TO %s' % sv.display.upper()) + ret = sv.displayControl(sv.display) + sv.dlog('xset display %s, ret = %d' % (sv.display, ret)) time.sleep(1) - if sysvals.sync: + if sv.sync: if not quiet: pprint('SYNCING FILESYSTEMS') + sv.dlog('syncing filesystems') call('sync', shell=True) - # mark the start point in the kernel ring buffer just as we start - sysvals.initdmesg() + sv.dlog('read dmesg') + sv.initdmesg() # start ftrace - if(sysvals.usecallgraph or sysvals.usetraceevents): + if(sv.usecallgraph or sv.usetraceevents): if not quiet: pprint('START TRACING') - sysvals.fsetVal('1', 'tracing_on') - if sysvals.useprocmon: + sv.dlog('start ftrace tracing') + sv.fsetVal('1', 'tracing_on') + if sv.useprocmon: + sv.dlog('start the process monitor') pm.start() - sysvals.cmdinfo(True) + sv.dlog('run the cmdinfo list before') + sv.cmdinfo(True) # execute however many s/r runs requested - for count in range(1,sysvals.execcount+1): + for count in range(1,sv.execcount+1): # x2delay in between test runs - if(count > 1 and sysvals.x2delay > 0): - sysvals.fsetVal('WAIT %d' % sysvals.x2delay, 'trace_marker') - time.sleep(sysvals.x2delay/1000.0) - sysvals.fsetVal('WAIT END', 'trace_marker') + if(count > 1 and sv.x2delay > 0): + sv.fsetVal('WAIT %d' % sv.x2delay, 'trace_marker') + time.sleep(sv.x2delay/1000.0) + sv.fsetVal('WAIT END', 'trace_marker') # start message - if sysvals.testcommand != '': + if sv.testcommand != '': pprint('COMMAND START') else: - if(sysvals.rtcwake): + if(sv.rtcwake): pprint('SUSPEND START') else: pprint('SUSPEND START (press a key to resume)') # set rtcwake - if(sysvals.rtcwake): + if(sv.rtcwake): if not quiet: - pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime) - sysvals.rtcWakeAlarmOn() + pprint('will issue an rtcwake in %d seconds' % sv.rtcwaketime) + sv.dlog('enable RTC wake alarm') + sv.rtcWakeAlarmOn() # start of suspend trace marker - if(sysvals.usecallgraph or sysvals.usetraceevents): - sysvals.fsetVal(datetime.now().strftime(sysvals.tmstart), 'trace_marker') + if(sv.usecallgraph or sv.usetraceevents): + sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker') # predelay delay - if(count == 1 and sysvals.predelay > 0): - sysvals.fsetVal('WAIT %d' % sysvals.predelay, 'trace_marker') - time.sleep(sysvals.predelay/1000.0) - sysvals.fsetVal('WAIT END', 'trace_marker') + if(count == 1 and sv.predelay > 0): + sv.fsetVal('WAIT %d' % sv.predelay, 'trace_marker') + time.sleep(sv.predelay/1000.0) + sv.fsetVal('WAIT END', 'trace_marker') # initiate suspend or command + sv.dlog('system executing a suspend') tdata = {'error': ''} - if sysvals.testcommand != '': - res = call(sysvals.testcommand+' 2>&1', shell=True); + if sv.testcommand != '': + res = call(sv.testcommand+' 2>&1', shell=True); if res != 0: tdata['error'] = 'cmd returned %d' % res else: - mode = sysvals.suspendmode - if sysvals.memmode and os.path.exists(sysvals.mempowerfile): + mode = sv.suspendmode + if sv.memmode and os.path.exists(sv.mempowerfile): mode = 'mem' - pf = open(sysvals.mempowerfile, 'w') - pf.write(sysvals.memmode) - pf.close() - if sysvals.diskmode and os.path.exists(sysvals.diskpowerfile): + sv.testVal(sv.mempowerfile, 'radio', sv.memmode) + if sv.diskmode and os.path.exists(sv.diskpowerfile): mode = 'disk' - pf = open(sysvals.diskpowerfile, 'w') - pf.write(sysvals.diskmode) - pf.close() - if mode == 'freeze' and sysvals.haveTurbostat(): + sv.testVal(sv.diskpowerfile, 'radio', sv.diskmode) + if sv.acpidebug: + sv.testVal(sv.acpipath, 'acpi', '0xe') + if mode == 'freeze' and sv.haveTurbostat(): # execution will pause here - turbo = sysvals.turbostat() + turbo = sv.turbostat() if turbo: tdata['turbo'] = turbo else: - pf = open(sysvals.powerfile, 'w') + pf = open(sv.powerfile, 'w') pf.write(mode) # execution will pause here try: pf.close() except Exception as e: tdata['error'] = str(e) - if(sysvals.rtcwake): - sysvals.rtcWakeAlarmOff() + sv.dlog('system returned from resume') + # reset everything + sv.testVal('restoreall') + if(sv.rtcwake): + sv.dlog('disable RTC wake alarm') + sv.rtcWakeAlarmOff() # postdelay delay - if(count == sysvals.execcount and sysvals.postdelay > 0): - sysvals.fsetVal('WAIT %d' % sysvals.postdelay, 'trace_marker') - time.sleep(sysvals.postdelay/1000.0) - sysvals.fsetVal('WAIT END', 'trace_marker') + if(count == sv.execcount and sv.postdelay > 0): + sv.fsetVal('WAIT %d' % sv.postdelay, 'trace_marker') + time.sleep(sv.postdelay/1000.0) + sv.fsetVal('WAIT END', 'trace_marker') # return from suspend pprint('RESUME COMPLETE') - if(sysvals.usecallgraph or sysvals.usetraceevents): - sysvals.fsetVal(datetime.now().strftime(sysvals.tmend), 'trace_marker') - if sysvals.wifi and wifi: - tdata['wifi'] = sysvals.pollWifi(wifi) - if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'): + if(sv.usecallgraph or sv.usetraceevents): + sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker') + if sv.wifi and wifi: + tdata['wifi'] = sv.pollWifi(wifi) + sv.dlog('wifi check, %s' % tdata['wifi']) + if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'): + sv.dlog('read the ACPI FPDT') tdata['fw'] = getFPDT(False) testdata.append(tdata) - cmdafter = sysvals.cmdinfo(False) + sv.dlog('run the cmdinfo list after') + cmdafter = sv.cmdinfo(False) # stop ftrace - if(sysvals.usecallgraph or sysvals.usetraceevents): - if sysvals.useprocmon: + if(sv.usecallgraph or sv.usetraceevents): + if sv.useprocmon: + sv.dlog('stop the process monitor') pm.stop() - sysvals.fsetVal('0', 'tracing_on') + sv.fsetVal('0', 'tracing_on') # grab a copy of the dmesg output if not quiet: pprint('CAPTURING DMESG') - sysvals.getdmesg(testdata) + sysvals.dlog('EXECUTION TRACE END') + sv.getdmesg(testdata) # grab a copy of the ftrace output - if(sysvals.usecallgraph or sysvals.usetraceevents): + if(sv.usecallgraph or sv.usetraceevents): if not quiet: pprint('CAPTURING TRACE') - op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata) + op = sv.writeDatafileHeader(sv.ftracefile, testdata) fp = open(tp+'trace', 'r') for line in fp: op.write(line) op.close() - sysvals.fsetVal('', 'trace') - sysvals.platforminfo(cmdafter) + sv.fsetVal('', 'trace') + sv.platforminfo(cmdafter) def readFile(file): if os.path.islink(file): @@ -5586,39 +5659,6 @@ def dmidecode(mempath, fatal=False): count += 1 return out -def displayControl(cmd): - xset, ret = 'timeout 10 xset -d :0.0 {0}', 0 - if sysvals.sudouser: - xset = 'sudo -u %s %s' % (sysvals.sudouser, xset) - if cmd == 'init': - ret = call(xset.format('dpms 0 0 0'), shell=True) - if not ret: - ret = call(xset.format('s off'), shell=True) - elif cmd == 'reset': - ret = call(xset.format('s reset'), shell=True) - elif cmd in ['on', 'off', 'standby', 'suspend']: - b4 = displayControl('stat') - ret = call(xset.format('dpms force %s' % cmd), shell=True) - if not ret: - curr = displayControl('stat') - sysvals.vprint('Display Switched: %s -> %s' % (b4, curr)) - if curr != cmd: - sysvals.vprint('WARNING: Display failed to change to %s' % cmd) - if ret: - sysvals.vprint('WARNING: Display failed to change to %s with xset' % cmd) - return ret - elif cmd == 'stat': - fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout - ret = 'unknown' - for line in fp: - m = re.match('[\s]*Monitor is (?P.*)', ascii(line)) - if(m and len(m.group('m')) >= 2): - out = m.group('m').lower() - ret = out[3:] if out[0:2] == 'in' else out - break - fp.close() - return ret - # Function: getFPDT # Description: # Read the acpi bios tables and pull out FPDT, the firmware data @@ -6001,8 +6041,19 @@ def rerunTest(htmlfile=''): # execute a suspend/resume, gather the logs, and generate the output def runTest(n=0, quiet=False): # prepare for the test - sysvals.initFtrace(quiet) sysvals.initTestOutput('suspend') + op = sysvals.writeDatafileHeader(sysvals.dmesgfile, []) + op.write('# EXECUTION TRACE START\n') + op.close() + if n <= 1: + if sysvals.rs != 0: + sysvals.dlog('%sabling runtime suspend' % ('en' if sysvals.rs > 0 else 'dis')) + sysvals.setRuntimeSuspend(True) + if sysvals.display: + ret = sysvals.displayControl('init') + sysvals.dlog('xset display init, ret = %d' % ret) + sysvals.dlog('initialize ftrace') + sysvals.initFtrace(quiet) # execute the test executeSuspend(quiet) @@ -6098,8 +6149,16 @@ def data_from_html(file, outpath, issues, fulldetail=False): if wifi: extra['wifi'] = wifi low = find_in_html(html, 'freeze time: ', ' ms') - if low and 'waking' in low: - issue = 'FREEZEWAKE' + for lowstr in ['waking', '+']: + if not low: + break + if lowstr not in low: + continue + if lowstr == '+': + issue = 'S2LOOPx%d' % len(low.split('+')) + else: + m = re.match('.*waking *(?P[0-9]*) *times.*', low) + issue = 'S2WAKEx%s' % m.group('n') if m else 'S2WAKExNaN' match = [i for i in issues if i['match'] == issue] if len(match) > 0: match[0]['count'] += 1 @@ -6605,6 +6664,11 @@ if __name__ == '__main__': val = next(args) except: doError('-info requires one string argument', True) + elif(arg == '-desc'): + try: + val = next(args) + except: + doError('-desc requires one string argument', True) elif(arg == '-rs'): try: val = next(args) @@ -6814,9 +6878,9 @@ if __name__ == '__main__': runSummary(sysvals.outdir, True, genhtml) elif(cmd in ['xon', 'xoff', 'xstandby', 'xsuspend', 'xinit', 'xreset']): sysvals.verbose = True - ret = displayControl(cmd[1:]) + ret = sysvals.displayControl(cmd[1:]) elif(cmd == 'xstat'): - pprint('Display Status: %s' % displayControl('stat').upper()) + pprint('Display Status: %s' % sysvals.displayControl('stat').upper()) elif(cmd == 'wificheck'): dev = sysvals.checkWifi() if dev: @@ -6854,12 +6918,8 @@ if __name__ == '__main__': if mode.startswith('disk-'): sysvals.diskmode = mode.split('-', 1)[-1] sysvals.suspendmode = 'disk' - sysvals.systemInfo(dmidecode(sysvals.mempath)) - setRuntimeSuspend(True) - if sysvals.display: - displayControl('init') failcnt, ret = 0, 0 if sysvals.multitest['run']: # run multiple tests in a separate subdirectory @@ -6900,7 +6960,10 @@ if __name__ == '__main__': sysvals.testdir = sysvals.outdir # run the test in the current directory ret = runTest() + + # reset to default values after testing if sysvals.display: - displayControl('reset') - setRuntimeSuspend(False) + sysvals.displayControl('reset') + if sysvals.rs != 0: + sysvals.setRuntimeSuspend(False) sys.exit(ret) From ab150c3f80dcce670926ab3ca412be5047011d22 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 13 Nov 2020 16:58:10 +0800 Subject: [PATCH 38/86] PM / suspend: fix kernel-doc markup Add parameter explanation to fix kernel-doc marks: kernel/power/suspend.c:233: warning: Function parameter or member 'state' not described in 'suspend_valid_only_mem' kernel/power/suspend.c:344: warning: Function parameter or member 'state' not described in 'suspend_prepare' Signed-off-by: Alex Shi [ rjw: Change the proposed parameter descriptions. ] Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 32391acc806b..d8cae434f9eb 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -224,6 +224,7 @@ EXPORT_SYMBOL_GPL(suspend_set_ops); /** * suspend_valid_only_mem - Generic memory-only valid callback. + * @state: Target system sleep state. * * Platform drivers that implement mem suspend only and only need to check for * that in their .valid() callback can use this instead of rolling their own @@ -335,6 +336,7 @@ static int suspend_test(int level) /** * suspend_prepare - Prepare for entering system sleep state. + * @state: Target system sleep state. * * Common code run for every system sleep state that can be entered (except for * hibernation). Run suspend notifiers, allocate the "suspend" console and From b4ba76fb1c999384c1b0840d216d42abcd611024 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 17 Nov 2020 13:47:59 +0000 Subject: [PATCH 39/86] powercap: Adjust printing the constraint name with new line The constrain name has limit of size 30, which sometimes might be hit. When this happens the new line might get lost. Prevent this and set the max limit for name string length equal 29. This would result is proper string clamping (when needed) and storing '\n' at index 29 and '\0' at 30, so similarly as desired originally. Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/powercap/powercap_sys.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index f808c5fa9838..602c55287e7d 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -170,9 +170,8 @@ static ssize_t show_constraint_name(struct device *dev, if (pconst && pconst->ops && pconst->ops->get_name) { name = pconst->ops->get_name(power_zone, id); if (name) { - snprintf(buf, POWERCAP_CONSTRAINT_NAME_LEN, - "%s\n", name); - buf[POWERCAP_CONSTRAINT_NAME_LEN] = '\0'; + sprintf(buf, "%.*s\n", POWERCAP_CONSTRAINT_NAME_LEN - 1, + name); len = strlen(buf); } } From 4e1d9a737d00f2cc811dc5654f82c92c7d80e98c Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 19 Nov 2020 08:25:39 +0100 Subject: [PATCH 40/86] PM: sleep: Add dev_wakeup_path() helper Add dev_wakeup_path() helper to avoid to spread dev->power.wakeup_path test in drivers. Signed-off-by: Patrice Chotard Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 4 ++-- drivers/base/power/main.c | 4 ++-- drivers/i2c/busses/i2c-stm32f7.c | 4 ++-- include/linux/pm_wakeup.h | 10 ++++++++++ 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 743268996336..e0894ef8457c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1142,7 +1142,7 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff) if (ret) return ret; - if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd)) + if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd)) return 0; if (genpd->dev_ops.stop && genpd->dev_ops.start && @@ -1196,7 +1196,7 @@ static int genpd_resume_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd)) + if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd)) return pm_generic_resume_noirq(dev); genpd_lock(genpd); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2b4255dc101e..46793276598d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1359,7 +1359,7 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev) spin_lock_irq(&parent->power.lock); - if (dev->power.wakeup_path && !parent->power.ignore_children) + if (device_wakeup_path(dev) && !parent->power.ignore_children) parent->power.wakeup_path = true; spin_unlock_irq(&parent->power.lock); @@ -1627,7 +1627,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) goto Complete; /* Avoid direct_complete to let wakeup_path propagate. */ - if (device_may_wakeup(dev) || dev->power.wakeup_path) + if (device_may_wakeup(dev) || device_wakeup_path(dev)) dev->power.direct_complete = false; if (dev->power.direct_complete) { diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index f41f51a176a1..9aa8e65b511e 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -2322,7 +2322,7 @@ static int stm32f7_i2c_suspend(struct device *dev) i2c_mark_adapter_suspended(&i2c_dev->adap); - if (!device_may_wakeup(dev) && !dev->power.wakeup_path) { + if (!device_may_wakeup(dev) && !device_wakeup_path(dev)) { ret = stm32f7_i2c_regs_backup(i2c_dev); if (ret < 0) { i2c_mark_adapter_resumed(&i2c_dev->adap); @@ -2341,7 +2341,7 @@ static int stm32f7_i2c_resume(struct device *dev) struct stm32f7_i2c_dev *i2c_dev = dev_get_drvdata(dev); int ret; - if (!device_may_wakeup(dev) && !dev->power.wakeup_path) { + if (!device_may_wakeup(dev) && !device_wakeup_path(dev)) { ret = pm_runtime_force_resume(dev); if (ret < 0) return ret; diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index aa3da6611533..196a157456aa 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -84,6 +84,11 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && !!dev->power.wakeup; } +static inline bool device_wakeup_path(struct device *dev) +{ + return dev->power.wakeup_path; +} + static inline void device_set_wakeup_path(struct device *dev) { dev->power.wakeup_path = true; @@ -174,6 +179,11 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && dev->power.should_wakeup; } +static inline bool device_wakeup_path(struct device *dev) +{ + return false; +} + static inline void device_set_wakeup_path(struct device *dev) {} static inline void __pm_stay_awake(struct wakeup_source *ws) {} From a94ef811f7c3748736b85db0406da8e4ea391ac6 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Thu, 19 Nov 2020 09:43:25 -0700 Subject: [PATCH 41/86] PM: domains: replace -ENOTSUPP with -EOPNOTSUPP While submitting a patch to add next_wakeup, checkpatch reported this - WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP + return -ENOTSUPP; Address the above warning in other functions in pm_domain.h. Reviewed-by: Ulf Hansson Signed-off-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ad0ec481416..5fd20d117cbe 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -255,24 +255,24 @@ static inline int pm_genpd_init(struct generic_pm_domain *genpd, } static inline int pm_genpd_remove(struct generic_pm_domain *genpd) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_remove_notifier(struct device *dev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) @@ -325,13 +325,13 @@ struct device *genpd_dev_pm_attach_by_name(struct device *dev, static inline int of_genpd_add_provider_simple(struct device_node *np, struct generic_pm_domain *genpd) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline void of_genpd_del_provider(struct device_node *np) {} @@ -387,7 +387,7 @@ static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev, static inline struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ From 7a25759eaa04b8c0ecb3db134922d6641ab2e6d1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 30 Nov 2020 22:32:02 +0000 Subject: [PATCH 42/86] cpuidle: Select polling interval based on a c-state with a longer target residency It was noted that a few workloads that idle rapidly regressed when commit 36fcb4292473 ("cpuidle: use first valid target residency as poll time") was merged. The workloads in question were heavy communicators that idle rapidly and were impacted by the c-state exit latency as the active CPUs were not polling at the time of wakeup. As they were not particularly realistic workloads, it was not considered to be a major problem. Unfortunately, a bug was reported for a real workload in a production environment that relied on large numbers of threads operating in a worker pool pattern. These threads would idle for periods of time longer than the C1 target residency and so incurred the c-state exit latency penalty. The application is very sensitive to wakeup latency and indirectly relying on behaviour prior to commit on a37b969a61c1 ("cpuidle: poll_state: Add time limit to poll_idle()") to poll for long enough to avoid the exit latency cost. The target residency of C1 is typically very short. On some x86 machines, it can be as low as 2 microseconds. In poll_idle(), the clock is checked every POLL_IDLE_RELAX_COUNT interations of cpu_relax() and even one iteration of that loop can be over 1 microsecond so the polling interval is very close to the granularity of what poll_idle() can detect. Furthermore, a basic ping pong workload like perf bench pipe has a longer round-trip time than the 2 microseconds meaning that the CPU will almost certainly not be polling when the ping-pong completes. This patch selects a polling interval based on an enabled c-state that has an target residency longer than 10usec. If there is no enabled-cstate then polling will be up to a TICK_NSEC/16 similar to what it was up until kernel 4.20. Polling for a full tick is unlikely (rescheduling event) and is much longer than the existing target residencies for a deep c-state. As an example, consider a CPU with the following c-state information from an Intel CPU; residency exit_latency C1 2 2 C1E 20 10 C3 100 33 C6 400 133 The polling interval selected is 20usec. If booted with intel_idle.max_cstate=1 then the polling interval is 250usec as the deeper c-states were not available. On an AMD EPYC machine, the c-state information is more limited and looks like residency exit_latency C1 2 1 C2 800 400 The polling interval selected is 250usec. While C2 was considered, the polling interval was clamped by CPUIDLE_POLL_MAX. Note that it is not expected that polling will be a universal win. As well as potentially trading power for performance, the performance is not guaranteed if the extra polling prevented a turbo state being reached. Making it a tunable was considered but it's driver-specific, may be overridden by a governor and is not a guaranteed polling interval making it difficult to describe without knowledge of the implementation. tbench4 vanilla polling Hmean 1 497.89 ( 0.00%) 543.15 * 9.09%* Hmean 2 975.88 ( 0.00%) 1059.73 * 8.59%* Hmean 4 1953.97 ( 0.00%) 2081.37 * 6.52%* Hmean 8 3645.76 ( 0.00%) 4052.95 * 11.17%* Hmean 16 6882.21 ( 0.00%) 6995.93 * 1.65%* Hmean 32 10752.20 ( 0.00%) 10731.53 * -0.19%* Hmean 64 12875.08 ( 0.00%) 12478.13 * -3.08%* Hmean 128 21500.54 ( 0.00%) 21098.60 * -1.87%* Hmean 256 21253.70 ( 0.00%) 21027.18 * -1.07%* Hmean 320 20813.50 ( 0.00%) 20580.64 * -1.12%* Signed-off-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 83af15f77f66..ef2ea1b12cd8 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -368,6 +368,19 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) cpuidle_curr_governor->reflect(dev, index); } +/* + * Min polling interval of 10usec is a guess. It is assuming that + * for most users, the time for a single ping-pong workload like + * perf bench pipe would generally complete within 10usec but + * this is hardware dependant. Actual time can be estimated with + * + * perf bench sched pipe -l 10000 + * + * Run multiple times to avoid cpufreq effects. + */ +#define CPUIDLE_POLL_MIN 10000 +#define CPUIDLE_POLL_MAX (TICK_NSEC / 16) + /** * cpuidle_poll_time - return amount of time to poll for, * governors can override dev->poll_limit_ns if necessary @@ -382,15 +395,23 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv, int i; u64 limit_ns; + BUILD_BUG_ON(CPUIDLE_POLL_MIN > CPUIDLE_POLL_MAX); + if (dev->poll_limit_ns) return dev->poll_limit_ns; - limit_ns = TICK_NSEC; + limit_ns = CPUIDLE_POLL_MAX; for (i = 1; i < drv->state_count; i++) { + u64 state_limit; + if (dev->states_usage[i].disable) continue; - limit_ns = drv->states[i].target_residency_ns; + state_limit = drv->states[i].target_residency_ns; + if (state_limit < CPUIDLE_POLL_MIN) + continue; + + limit_ns = min_t(u64, state_limit, CPUIDLE_POLL_MAX); break; } From 16e8b2a7cb886bcc3dd89ad28948d374a2319bbc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 3 Dec 2020 22:24:38 +0300 Subject: [PATCH 43/86] PM / devfreq: tegra30: Support interconnect and OPPs from device-tree This patch moves ACTMON driver away from generating OPP table by itself, transitioning it to use the table which comes from device-tree. This change breaks compatibility with older device-trees and brings support for the interconnect framework to the driver. This is a mandatory change which needs to be done in order to implement interconnect-based memory DVFS, i.e. device-trees need to be updated. Now ACTMON issues a memory bandwidth requests using dev_pm_opp_set_bw() instead of driving EMC clock rate directly. Tested-by: Peter Geis Tested-by: Nicolas Chauvet Acked-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra30-devfreq.c | 79 +++++++++++++++---------------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 38cc0d014738..145ef91ae092 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -19,6 +19,8 @@ #include #include +#include + #include "governor.h" #define ACTMON_GLB_STATUS 0x0 @@ -155,6 +157,7 @@ struct tegra_devfreq_device { struct tegra_devfreq { struct devfreq *devfreq; + struct opp_table *opp_table; struct reset_control *reset; struct clk *clock; @@ -612,34 +615,19 @@ static void tegra_actmon_stop(struct tegra_devfreq *tegra) static int tegra_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { - struct tegra_devfreq *tegra = dev_get_drvdata(dev); - struct devfreq *devfreq = tegra->devfreq; struct dev_pm_opp *opp; - unsigned long rate; - int err; + int ret; opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) { dev_err(dev, "Failed to find opp for %lu Hz\n", *freq); return PTR_ERR(opp); } - rate = dev_pm_opp_get_freq(opp); + + ret = dev_pm_opp_set_bw(dev, opp); dev_pm_opp_put(opp); - err = clk_set_min_rate(tegra->emc_clock, rate * KHZ); - if (err) - return err; - - err = clk_set_rate(tegra->emc_clock, 0); - if (err) - goto restore_min_rate; - - return 0; - -restore_min_rate: - clk_set_min_rate(tegra->emc_clock, devfreq->previous_freq); - - return err; + return ret; } static int tegra_devfreq_get_dev_status(struct device *dev, @@ -655,7 +643,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev, stat->private_data = tegra; /* The below are to be used by the other governors */ - stat->current_frequency = cur_freq; + stat->current_frequency = cur_freq * KHZ; actmon_dev = &tegra->devices[MCALL]; @@ -705,7 +693,12 @@ static int tegra_governor_get_target(struct devfreq *devfreq, target_freq = max(target_freq, dev->target_freq); } - *freq = target_freq; + /* + * tegra-devfreq driver operates with KHz units, while OPP table + * entries use Hz units. Hence we need to convert the units for the + * devfreq core. + */ + *freq = target_freq * KHZ; return 0; } @@ -774,6 +767,7 @@ static struct devfreq_governor tegra_devfreq_governor = { static int tegra_devfreq_probe(struct platform_device *pdev) { + u32 hw_version = BIT(tegra_sku_info.soc_speedo_id); struct tegra_devfreq_device *dev; struct tegra_devfreq *tegra; struct devfreq *devfreq; @@ -822,11 +816,25 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return err; } + tegra->opp_table = dev_pm_opp_set_supported_hw(&pdev->dev, + &hw_version, 1); + err = PTR_ERR_OR_ZERO(tegra->opp_table); + if (err) { + dev_err(&pdev->dev, "Failed to set supported HW: %d\n", err); + return err; + } + + err = dev_pm_opp_of_add_table(&pdev->dev); + if (err) { + dev_err(&pdev->dev, "Failed to add OPP table: %d\n", err); + goto put_hw; + } + err = clk_prepare_enable(tegra->clock); if (err) { dev_err(&pdev->dev, "Failed to prepare and enable ACTMON clock\n"); - return err; + goto remove_table; } err = reset_control_reset(tegra->reset); @@ -850,23 +858,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev) dev->regs = tegra->regs + dev->config->offset; } - for (rate = 0; rate <= tegra->max_freq * KHZ; rate++) { - rate = clk_round_rate(tegra->emc_clock, rate); - - if (rate < 0) { - dev_err(&pdev->dev, - "Failed to round clock rate: %ld\n", rate); - err = rate; - goto remove_opps; - } - - err = dev_pm_opp_add(&pdev->dev, rate / KHZ, 0); - if (err) { - dev_err(&pdev->dev, "Failed to add OPP: %d\n", err); - goto remove_opps; - } - } - platform_set_drvdata(pdev, tegra); tegra->clk_rate_change_nb.notifier_call = tegra_actmon_clk_notify_cb; @@ -882,7 +873,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev) } tegra_devfreq_profile.initial_freq = clk_get_rate(tegra->emc_clock); - tegra_devfreq_profile.initial_freq /= KHZ; devfreq = devfreq_add_device(&pdev->dev, &tegra_devfreq_profile, "tegra_actmon", NULL); @@ -902,6 +892,10 @@ remove_opps: reset_control_reset(tegra->reset); disable_clk: clk_disable_unprepare(tegra->clock); +remove_table: + dev_pm_opp_of_remove_table(&pdev->dev); +put_hw: + dev_pm_opp_put_supported_hw(tegra->opp_table); return err; } @@ -913,11 +907,12 @@ static int tegra_devfreq_remove(struct platform_device *pdev) devfreq_remove_device(tegra->devfreq); devfreq_remove_governor(&tegra_devfreq_governor); - dev_pm_opp_remove_all_dynamic(&pdev->dev); - reset_control_reset(tegra->reset); clk_disable_unprepare(tegra->clock); + dev_pm_opp_of_remove_table(&pdev->dev); + dev_pm_opp_put_supported_hw(tegra->opp_table); + return 0; } From 6a575e84f11e15078629f0d16bff2bc354a6bfc0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 3 Dec 2020 22:24:39 +0300 Subject: [PATCH 44/86] PM / devfreq: tegra30: Separate configurations per-SoC generation Previously we were using count-weight of the T124 for T30 in order to get EMC clock rate that was reasonable for T30. In fact the count-weight should be x2 times smaller on T30, but then devfreq was producing a bit too low EMC clock rate for ISO memory clients, like display controller for example. Now both Tegra ACTMON and Tegra DRM display drivers support interconnect framework and display driver tells to ICC what a minimum memory bandwidth is needed, preventing FIFO underflows. Thus, now we can use a proper count-weight value for Tegra30 and MC_ALL device config needs a bit more aggressive boosting. Add a separate ACTMON driver configuration that is specific to Tegra30. Tested-by: Peter Geis Tested-by: Nicolas Chauvet Acked-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra30-devfreq.c | 68 ++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 145ef91ae092..117cad7968ab 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -57,13 +57,6 @@ #define ACTMON_BELOW_WMARK_WINDOW 3 #define ACTMON_BOOST_FREQ_STEP 16000 -/* - * Activity counter is incremented every 256 memory transactions, and each - * transaction takes 4 EMC clocks for Tegra124; So the COUNT_WEIGHT is - * 4 * 256 = 1024. - */ -#define ACTMON_COUNT_WEIGHT 0x400 - /* * ACTMON_AVERAGE_WINDOW_LOG2: default value for @DEV_CTRL_K_VAL, which * translates to 2 ^ (K_VAL + 1). ex: 2 ^ (6 + 1) = 128 @@ -111,7 +104,7 @@ enum tegra_actmon_device { MCCPU, }; -static const struct tegra_devfreq_device_config actmon_device_configs[] = { +static const struct tegra_devfreq_device_config tegra124_device_configs[] = { { /* MCALL: All memory accesses (including from the CPUs) */ .offset = 0x1c0, @@ -133,6 +126,28 @@ static const struct tegra_devfreq_device_config actmon_device_configs[] = { }, }; +static const struct tegra_devfreq_device_config tegra30_device_configs[] = { + { + /* MCALL: All memory accesses (including from the CPUs) */ + .offset = 0x1c0, + .irq_mask = 1 << 26, + .boost_up_coeff = 200, + .boost_down_coeff = 50, + .boost_up_threshold = 20, + .boost_down_threshold = 10, + }, + { + /* MCCPU: memory accesses from the CPUs */ + .offset = 0x200, + .irq_mask = 1 << 25, + .boost_up_coeff = 800, + .boost_down_coeff = 40, + .boost_up_threshold = 27, + .boost_down_threshold = 10, + .avg_dependency_threshold = 16000, /* 16MHz in kHz units */ + }, +}; + /** * struct tegra_devfreq_device - state specific to an ACTMON device * @@ -155,6 +170,12 @@ struct tegra_devfreq_device { unsigned long target_freq; }; +struct tegra_devfreq_soc_data { + const struct tegra_devfreq_device_config *configs; + /* Weight value for count measurements */ + unsigned int count_weight; +}; + struct tegra_devfreq { struct devfreq *devfreq; struct opp_table *opp_table; @@ -171,11 +192,13 @@ struct tegra_devfreq { struct delayed_work cpufreq_update_work; struct notifier_block cpu_rate_change_nb; - struct tegra_devfreq_device devices[ARRAY_SIZE(actmon_device_configs)]; + struct tegra_devfreq_device devices[2]; unsigned int irq; bool started; + + const struct tegra_devfreq_soc_data *soc; }; struct tegra_actmon_emc_ratio { @@ -488,7 +511,7 @@ static void tegra_actmon_configure_device(struct tegra_devfreq *tegra, tegra_devfreq_update_avg_wmark(tegra, dev); tegra_devfreq_update_wmark(tegra, dev); - device_writel(dev, ACTMON_COUNT_WEIGHT, ACTMON_DEV_COUNT_WEIGHT); + device_writel(dev, tegra->soc->count_weight, ACTMON_DEV_COUNT_WEIGHT); device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS); val |= ACTMON_DEV_CTRL_ENB_PERIODIC; @@ -779,6 +802,8 @@ static int tegra_devfreq_probe(struct platform_device *pdev) if (!tegra) return -ENOMEM; + tegra->soc = of_device_get_match_data(&pdev->dev); + tegra->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(tegra->regs)) return PTR_ERR(tegra->regs); @@ -852,9 +877,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) tegra->max_freq = rate / KHZ; - for (i = 0; i < ARRAY_SIZE(actmon_device_configs); i++) { + for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { dev = tegra->devices + i; - dev->config = actmon_device_configs + i; + dev->config = tegra->soc->configs + i; dev->regs = tegra->regs + dev->config->offset; } @@ -916,9 +941,24 @@ static int tegra_devfreq_remove(struct platform_device *pdev) return 0; } +static const struct tegra_devfreq_soc_data tegra124_soc = { + .configs = tegra124_device_configs, + + /* + * Activity counter is incremented every 256 memory transactions, + * and each transaction takes 4 EMC clocks. + */ + .count_weight = 4 * 256, +}; + +static const struct tegra_devfreq_soc_data tegra30_soc = { + .configs = tegra30_device_configs, + .count_weight = 2 * 256, +}; + static const struct of_device_id tegra_devfreq_of_match[] = { - { .compatible = "nvidia,tegra30-actmon" }, - { .compatible = "nvidia,tegra124-actmon" }, + { .compatible = "nvidia,tegra30-actmon", .data = &tegra30_soc, }, + { .compatible = "nvidia,tegra124-actmon", .data = &tegra124_soc, }, { }, }; From 3a5e6732a74c44d7c78a764b9a7701135565df8f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 12 Oct 2020 22:01:41 +0800 Subject: [PATCH 45/86] cpufreq: sti-cpufreq: fix mem leak in sti_cpufreq_set_opp_info() Use dev_pm_opp_put_prop_name() to avoid mem leak, which free opp_table. Signed-off-by: Yangtao Li Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/sti-cpufreq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 4ac6fb23792a..aab565edc4d3 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -223,7 +223,8 @@ use_defaults: opp_table = dev_pm_opp_set_supported_hw(dev, version, VERSION_ELEMENTS); if (IS_ERR(opp_table)) { dev_err(dev, "Failed to set supported hardware\n"); - return PTR_ERR(opp_table); + ret = PTR_ERR(opp_table); + goto err_put_prop_name; } dev_dbg(dev, "pcode: %d major: %d minor: %d substrate: %d\n", @@ -232,6 +233,10 @@ use_defaults: version[0], version[1], version[2]); return 0; + +err_put_prop_name: + dev_pm_opp_put_prop_name(opp_table); + return ret; } static int sti_cpufreq_fetch_syscon_registers(void) From de4ca30958676f922cd7610d96342b054c05c86b Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 13 Oct 2020 11:27:08 +0200 Subject: [PATCH 46/86] cpufreq: mediatek: Add support for mt8167 Add compatible string for mediatek mt8167 Signed-off-by: Fabien Parent Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + drivers/cpufreq/mediatek-cpufreq.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 3776d960f405..744bda961d54 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -119,6 +119,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "mediatek,mt2712", }, { .compatible = "mediatek,mt7622", }, { .compatible = "mediatek,mt7623", }, + { .compatible = "mediatek,mt8167", }, { .compatible = "mediatek,mt817x", }, { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 7d1212c9b7c8..c09bff86bb9b 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -532,6 +532,7 @@ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt2712", }, { .compatible = "mediatek,mt7622", }, { .compatible = "mediatek,mt7623", }, + { .compatible = "mediatek,mt8167", }, { .compatible = "mediatek,mt817x", }, { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, From 75118c8ef9d16ecbb56e37547061515c75bb91b4 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 13 Oct 2020 11:27:09 +0200 Subject: [PATCH 47/86] cpufreq: blacklist mt8516 in cpufreq-dt-platdev Add MT8516 to cpufreq-dt-platdev blacklist since the actual scaling is handled by the 'mediatek-cpufreq' driver. Signed-off-by: Fabien Parent Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 744bda961d54..bd2db0188cbb 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -124,6 +124,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, { .compatible = "mediatek,mt8183", }, + { .compatible = "mediatek,mt8516", }, { .compatible = "nvidia,tegra20", }, { .compatible = "nvidia,tegra30", }, From 68b9cd7270f0191bd51b635f8f4778951ee3811a Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Wed, 14 Oct 2020 15:06:11 +0530 Subject: [PATCH 48/86] cpufreq: tegra194: get consistent cpuinfo_cur_freq Frequency returned by 'cpuinfo_cur_freq' using counters is not fixed and keeps changing slightly. This change returns a consistent value from freq_table. If the reconstructed frequency has acceptable delta from the last written value, then return the frequency corresponding to the last written ndiv value from freq_table. Otherwise, print a warning and return the reconstructed freq. Signed-off-by: Sumit Gupta Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 62 +++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index e1d931c457a7..79015875f346 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -180,9 +180,61 @@ static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay) return (rate_mhz * KHZ); /* in KHz */ } +static void get_cpu_ndiv(void *ndiv) +{ + u64 ndiv_val; + + asm volatile("mrs %0, s3_0_c15_c0_4" : "=r" (ndiv_val) : ); + + *(u64 *)ndiv = ndiv_val; +} + +static void set_cpu_ndiv(void *data) +{ + struct cpufreq_frequency_table *tbl = data; + u64 ndiv_val = (u64)tbl->driver_data; + + asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val)); +} + static unsigned int tegra194_get_speed(u32 cpu) { - return tegra194_get_speed_common(cpu, US_DELAY); + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + struct cpufreq_frequency_table *pos; + unsigned int rate; + u64 ndiv; + int ret; + u32 cl; + + smp_call_function_single(cpu, get_cpu_cluster, &cl, true); + + /* reconstruct actual cpu freq using counters */ + rate = tegra194_get_speed_common(cpu, US_DELAY); + + /* get last written ndiv value */ + ret = smp_call_function_single(cpu, get_cpu_ndiv, &ndiv, true); + if (WARN_ON_ONCE(ret)) + return rate; + + /* + * If the reconstructed frequency has acceptable delta from + * the last written value, then return freq corresponding + * to the last written ndiv value from freq_table. This is + * done to return consistent value. + */ + cpufreq_for_each_valid_entry(pos, data->tables[cl]) { + if (pos->driver_data != ndiv) + continue; + + if (abs(pos->frequency - rate) > 115200) { + pr_warn("cpufreq: cpu%d,cur:%u,set:%u,set ndiv:%llu\n", + cpu, rate, pos->frequency, ndiv); + } else { + rate = pos->frequency; + } + break; + } + return rate; } static int tegra194_cpufreq_init(struct cpufreq_policy *policy) @@ -209,14 +261,6 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy) return 0; } -static void set_cpu_ndiv(void *data) -{ - struct cpufreq_frequency_table *tbl = data; - u64 ndiv_val = (u64)tbl->driver_data; - - asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val)); -} - static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { From 2f05c19d9ef4f5a42634f83bdb0db596ffc0dd30 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Sat, 31 Oct 2020 09:18:54 +0800 Subject: [PATCH 49/86] cpufreq: mediatek: add missing platform_driver_unregister() on error in mtk_cpufreq_driver_init Add the missing platform_driver_unregister() before return from mtk_cpufreq_driver_init in the error handling case when failed to register mtk-cpufreq platform device Signed-off-by: Qinglang Miao Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index c09bff86bb9b..1aa512054a96 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -573,6 +573,7 @@ static int __init mtk_cpufreq_driver_init(void) pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0); if (IS_ERR(pdev)) { pr_err("failed to register mtk-cpufreq platform device\n"); + platform_driver_unregister(&mtk_cpufreq_platdrv); return PTR_ERR(pdev); } From 925a5bcefe105f2790ecbdc252eb2315573f309d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:31 +0100 Subject: [PATCH 50/86] cpufreq: ap806: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: f525a670533d9 ("cpufreq: ap806: add cpufreq driver for Armada 8K") Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-8k-cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index 39e34f5066d3..b0fc5e84f857 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -204,6 +204,12 @@ static void __exit armada_8k_cpufreq_exit(void) } module_exit(armada_8k_cpufreq_exit); +static const struct of_device_id __maybe_unused armada_8k_cpufreq_of_match[] = { + { .compatible = "marvell,ap806-cpu-clock" }, + { }, +}; +MODULE_DEVICE_TABLE(of, armada_8k_cpufreq_of_match); + MODULE_AUTHOR("Gregory Clement "); MODULE_DESCRIPTION("Armada 8K cpufreq driver"); MODULE_LICENSE("GPL"); From 9433777a6e0aae27468d3434b75cd51bb88ff711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:32 +0100 Subject: [PATCH 51/86] cpufreq: highbank: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: 6754f556103be ("cpufreq / highbank: add support for highbank cpufreq") Signed-off-by: Viresh Kumar --- drivers/cpufreq/highbank-cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index 5a7f6dafcddb..ac57cddc5f2f 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -101,6 +101,13 @@ out_put_node: } module_init(hb_cpufreq_driver_init); +static const struct of_device_id __maybe_unused hb_cpufreq_of_match[] = { + { .compatible = "calxeda,highbank" }, + { .compatible = "calxeda,ecx-2000" }, + { }, +}; +MODULE_DEVICE_TABLE(of, hb_cpufreq_of_match); + MODULE_AUTHOR("Mark Langsdorf "); MODULE_DESCRIPTION("Calxeda Highbank cpufreq driver"); MODULE_LICENSE("GPL"); From af6eca06501118af3e2ad46eee8edab20624b74e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:33 +0100 Subject: [PATCH 52/86] cpufreq: mediatek: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: 501c574f4e3a5 ("cpufreq: mediatek: Add support of cpufreq to MT2701/MT7623 SoC") Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 1aa512054a96..022e3e966e71 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -541,6 +541,7 @@ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { } }; +MODULE_DEVICE_TABLE(of, mtk_cpufreq_machines); static int __init mtk_cpufreq_driver_init(void) { From a5a6031663bc1dd0a10babd49d1bcb3153a8327f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:34 +0100 Subject: [PATCH 53/86] cpufreq: qcom: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: 46e2856b8e188 ("cpufreq: Add Kryo CPU scaling driver") Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index d06b37822c3d..fba9937a406b 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -464,6 +464,7 @@ static const struct of_device_id qcom_cpufreq_match_list[] __initconst = { { .compatible = "qcom,msm8960", .data = &match_data_krait }, {}, }; +MODULE_DEVICE_TABLE(of, qcom_cpufreq_match_list); /* * Since the driver depends on smem and nvmem drivers, which may From 183747ab52654eb406fc6b5bfb40806b75d31811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:35 +0100 Subject: [PATCH 54/86] cpufreq: st: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: ab0ea257fc58d ("cpufreq: st: Provide runtime initialised driver for ST's platforms") Signed-off-by: Viresh Kumar --- drivers/cpufreq/sti-cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index aab565edc4d3..fdb0a722d881 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -297,6 +297,13 @@ register_cpufreq_dt: } module_init(sti_cpufreq_init); +static const struct of_device_id __maybe_unused sti_cpufreq_of_match[] = { + { .compatible = "st,stih407" }, + { .compatible = "st,stih410" }, + { }, +}; +MODULE_DEVICE_TABLE(of, sti_cpufreq_of_match); + MODULE_DESCRIPTION("STMicroelectronics CPUFreq/OPP driver"); MODULE_AUTHOR("Ajitpal Singh "); MODULE_AUTHOR("Lee Jones "); From af2096f285077e3339eb835ad06c50bdd59f01b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:36 +0100 Subject: [PATCH 55/86] cpufreq: sun50i: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: f328584f7bff8 ("cpufreq: Add sun50i nvmem based CPU scaling driver") Reviewed-by: Yangtao Li Signed-off-by: Viresh Kumar --- drivers/cpufreq/sun50i-cpufreq-nvmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 9907a165135b..2deed8d8773f 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -167,6 +167,7 @@ static const struct of_device_id sun50i_cpufreq_match_list[] = { { .compatible = "allwinner,sun50i-h6" }, {} }; +MODULE_DEVICE_TABLE(of, sun50i_cpufreq_match_list); static const struct of_device_id *sun50i_cpufreq_match_node(void) { From b9acab091842ca8b288882798bb809f7abf5408a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:37 +0100 Subject: [PATCH 56/86] cpufreq: loongson1: Add missing MODULE_ALIAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_ALIAS for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: a0a22cf14472f ("cpufreq: Loongson1: Add cpufreq driver for Loongson1B") Signed-off-by: Viresh Kumar --- drivers/cpufreq/loongson1-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c index 0ea88778882a..86f612593e49 100644 --- a/drivers/cpufreq/loongson1-cpufreq.c +++ b/drivers/cpufreq/loongson1-cpufreq.c @@ -216,6 +216,7 @@ static struct platform_driver ls1x_cpufreq_platdrv = { module_platform_driver(ls1x_cpufreq_platdrv); +MODULE_ALIAS("platform:ls1x-cpufreq"); MODULE_AUTHOR("Kelvin Cheung "); MODULE_DESCRIPTION("Loongson1 CPUFreq driver"); MODULE_LICENSE("GPL"); From c0382d049d2def37b81e907a8b22661a4a4a6eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:38 +0100 Subject: [PATCH 57/86] cpufreq: scpi: Add missing MODULE_ALIAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_ALIAS for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: 8def31034d033 ("cpufreq: arm_big_little: add SCPI interface driver") Signed-off-by: Viresh Kumar --- drivers/cpufreq/scpi-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 43db05b949d9..e5140ad63db8 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -233,6 +233,7 @@ static struct platform_driver scpi_cpufreq_platdrv = { }; module_platform_driver(scpi_cpufreq_platdrv); +MODULE_ALIAS("platform:scpi-cpufreq"); MODULE_AUTHOR("Sudeep Holla "); MODULE_DESCRIPTION("ARM SCPI CPUFreq interface driver"); MODULE_LICENSE("GPL v2"); From d15183991c2d53d7cecf27a1555c91b702cef1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 3 Nov 2020 16:11:39 +0100 Subject: [PATCH 58/86] cpufreq: vexpress-spc: Add missing MODULE_ALIAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_ALIAS for automatic loading of this cpufreq driver when it is compiled as an external module. Signed-off-by: Pali Rohár Fixes: 47ac9aa165540 ("cpufreq: arm_big_little: add vexpress SPC interface driver") Signed-off-by: Viresh Kumar --- drivers/cpufreq/vexpress-spc-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index e89b905754d2..f711d8eaea6a 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -591,6 +591,7 @@ static struct platform_driver ve_spc_cpufreq_platdrv = { }; module_platform_driver(ve_spc_cpufreq_platdrv); +MODULE_ALIAS("platform:vexpress-spc-cpufreq"); MODULE_AUTHOR("Viresh Kumar "); MODULE_AUTHOR("Sudeep Holla "); MODULE_DESCRIPTION("Vexpress SPC ARM big LITTLE cpufreq driver"); From fc928b901dc68481ba3e524860a641fe13e25dfe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Dec 2020 23:53:32 +0100 Subject: [PATCH 59/86] cpufreq: imx: fix NVMEM_IMX_OCOTP dependency A driver should not 'select' drivers from another subsystem. If NVMEM is disabled, this one results in a warning: WARNING: unmet direct dependencies detected for NVMEM_IMX_OCOTP Depends on [n]: NVMEM [=n] && (ARCH_MXC [=y] || COMPILE_TEST [=y]) && HAS_IOMEM [=y] Selected by [y]: - ARM_IMX6Q_CPUFREQ [=y] && CPU_FREQ [=y] && (ARM || ARM64 [=y]) && ARCH_MXC [=y] && REGULATOR_ANATOP [=y] Change the 'select' to 'depends on' to prevent it from going wrong, and allow compile-testing without that driver, since it is only a runtime dependency. Fixes: 2782ef34ed23 ("cpufreq: imx: Select NVMEM_IMX_OCOTP") Signed-off-by: Arnd Bergmann Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 015ec0c02835..1f73fa75b1a0 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -94,7 +94,7 @@ config ARM_IMX6Q_CPUFREQ tristate "Freescale i.MX6 cpufreq support" depends on ARCH_MXC depends on REGULATOR_ANATOP - select NVMEM_IMX_OCOTP + depends on NVMEM_IMX_OCOTP || COMPILE_TEST select PM_OPP help This adds cpufreq driver support for Freescale i.MX6 series SoCs. From b7b4e785520ff87383ab5131f903544a261e83a1 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 2 Dec 2020 09:14:16 +0000 Subject: [PATCH 60/86] cpufreq: tegra186: Fix sparse 'incorrect type in assignment' warning Sparse warns that the incorrect type is being assigned to the CPUFREQ driver_data variable in the Tegra186 CPUFREQ driver. The Tegra186 CPUFREQ driver is assigned a type of 'void __iomem *' to a pointer of type 'void *' ... drivers/cpufreq/tegra186-cpufreq.c:72:37: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void *driver_data @@ got void [noderef] __iomem * @@ ... drivers/cpufreq/tegra186-cpufreq.c:87:40: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void [noderef] __iomem *edvd_reg @@ got void *driver_data @@ The Tegra186 CPUFREQ driver is using the policy->driver_data variable to store and iomem pointer to a Tegra186 CPU register that is used to set the clock speed for the CPU. This is not necessary because the register base address is already stored in the driver data and the offset of the register for each CPU is static. Therefore, fix this by adding a new structure with the register offsets for each CPU and store this in the main driver data structure along with the register base address. Please note that a new structure has been added for storing the register offsets rather than a simple array, because this will permit further clean-ups and simplification of the driver. Signed-off-by: Jon Hunter Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 61 ++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 7eb2c56c65de..98b5f32eb0be 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -12,13 +12,45 @@ #include #include -#define EDVD_CORE_VOLT_FREQ(core) (0x20 + (core) * 0x4) -#define EDVD_CORE_VOLT_FREQ_F_SHIFT 0 -#define EDVD_CORE_VOLT_FREQ_F_MASK 0xffff -#define EDVD_CORE_VOLT_FREQ_V_SHIFT 16 +#define EDVD_OFFSET_A57(core) ((SZ_64K * 6) + (0x20 + (core) * 0x4)) +#define EDVD_OFFSET_DENVER(core) ((SZ_64K * 7) + (0x20 + (core) * 0x4)) +#define EDVD_CORE_VOLT_FREQ_F_SHIFT 0 +#define EDVD_CORE_VOLT_FREQ_F_MASK 0xffff +#define EDVD_CORE_VOLT_FREQ_V_SHIFT 16 + +struct tegra186_cpufreq_cpu { + unsigned int edvd_offset; +}; + +static const struct tegra186_cpufreq_cpu tegra186_cpus[] = { + /* CPU0 - A57 Cluster */ + { + .edvd_offset = EDVD_OFFSET_A57(0) + }, + /* CPU1 - Denver Cluster */ + { + .edvd_offset = EDVD_OFFSET_DENVER(0) + }, + /* CPU2 - Denver Cluster */ + { + .edvd_offset = EDVD_OFFSET_DENVER(1) + }, + /* CPU3 - A57 Cluster */ + { + .edvd_offset = EDVD_OFFSET_A57(1) + }, + /* CPU4 - A57 Cluster */ + { + .edvd_offset = EDVD_OFFSET_A57(2) + }, + /* CPU5 - A57 Cluster */ + { + .edvd_offset = EDVD_OFFSET_A57(3) + }, + +}; struct tegra186_cpufreq_cluster_info { - unsigned long offset; int cpus[4]; unsigned int bpmp_cluster_id; }; @@ -27,13 +59,11 @@ struct tegra186_cpufreq_cluster_info { static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = { /* Denver cluster */ { - .offset = SZ_64K * 7, .cpus = { 1, 2, NO_CPU, NO_CPU }, .bpmp_cluster_id = 0, }, /* A57 cluster */ { - .offset = SZ_64K * 6, .cpus = { 0, 3, 4, 5 }, .bpmp_cluster_id = 1, }, @@ -51,6 +81,7 @@ struct tegra186_cpufreq_data { size_t num_clusters; struct tegra186_cpufreq_cluster *clusters; + const struct tegra186_cpufreq_cpu *cpus; }; static int tegra186_cpufreq_init(struct cpufreq_policy *policy) @@ -71,13 +102,12 @@ static int tegra186_cpufreq_init(struct cpufreq_policy *policy) if (core == ARRAY_SIZE(info->cpus)) continue; - policy->driver_data = - data->regs + info->offset + EDVD_CORE_VOLT_FREQ(core); policy->freq_table = cluster->table; break; } policy->cpuinfo.transition_latency = 300 * 1000; + policy->driver_data = NULL; return 0; } @@ -85,11 +115,12 @@ static int tegra186_cpufreq_init(struct cpufreq_policy *policy) static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { + struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); struct cpufreq_frequency_table *tbl = policy->freq_table + index; - void __iomem *edvd_reg = policy->driver_data; + unsigned int edvd_offset = data->cpus[policy->cpu].edvd_offset; u32 edvd_val = tbl->driver_data; - writel(edvd_val, edvd_reg); + writel(edvd_val, data->regs + edvd_offset); return 0; } @@ -98,16 +129,15 @@ static unsigned int tegra186_cpufreq_get(unsigned int cpu) { struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); struct cpufreq_policy *policy; - void __iomem *edvd_reg; - unsigned int i, freq = 0; + unsigned int i, edvd_offset, freq = 0; u32 ndiv; policy = cpufreq_cpu_get(cpu); if (!policy) return 0; - edvd_reg = policy->driver_data; - ndiv = readl(edvd_reg) & EDVD_CORE_VOLT_FREQ_F_MASK; + edvd_offset = data->cpus[policy->cpu].edvd_offset; + ndiv = readl(data->regs + edvd_offset) & EDVD_CORE_VOLT_FREQ_F_MASK; for (i = 0; i < data->num_clusters; i++) { struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; @@ -240,6 +270,7 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) return -ENOMEM; data->num_clusters = ARRAY_SIZE(tegra186_clusters); + data->cpus = tegra186_cpus; bpmp = tegra_bpmp_get(&pdev->dev); if (IS_ERR(bpmp)) From cfef4bcaccf35f0b80acc5c79967996b2eb88ba6 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 2 Dec 2020 09:14:17 +0000 Subject: [PATCH 61/86] cpufreq: tegra186: Simplify cluster information lookup The CPUFREQ driver framework references each individual CPUs when getting and setting the speed. Tegra186 has 3 clusters of A57 CPUs and 1 cluster of Denver CPUs. Hence, the Tegra186 CPUFREQ driver need to know which cluster a given CPU belongs to. The logic in the Tegra186 driver can be greatly simplified by storing the cluster ID associated with each CPU in the tegra186_cpufreq_cpu structure. This allow us to completely remove the Tegra cluster info structure from the driver and simplifiy the code. Signed-off-by: Jon Hunter Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 85 +++++++----------------------- 1 file changed, 20 insertions(+), 65 deletions(-) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 98b5f32eb0be..e566ea298b59 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -12,6 +12,7 @@ #include #include +#define TEGRA186_NUM_CLUSTERS 2 #define EDVD_OFFSET_A57(core) ((SZ_64K * 6) + (0x20 + (core) * 0x4)) #define EDVD_OFFSET_DENVER(core) ((SZ_64K * 7) + (0x20 + (core) * 0x4)) #define EDVD_CORE_VOLT_FREQ_F_SHIFT 0 @@ -19,58 +20,44 @@ #define EDVD_CORE_VOLT_FREQ_V_SHIFT 16 struct tegra186_cpufreq_cpu { + unsigned int bpmp_cluster_id; unsigned int edvd_offset; }; static const struct tegra186_cpufreq_cpu tegra186_cpus[] = { /* CPU0 - A57 Cluster */ { + .bpmp_cluster_id = 1, .edvd_offset = EDVD_OFFSET_A57(0) }, /* CPU1 - Denver Cluster */ { + .bpmp_cluster_id = 0, .edvd_offset = EDVD_OFFSET_DENVER(0) }, /* CPU2 - Denver Cluster */ { + .bpmp_cluster_id = 0, .edvd_offset = EDVD_OFFSET_DENVER(1) }, /* CPU3 - A57 Cluster */ { + .bpmp_cluster_id = 1, .edvd_offset = EDVD_OFFSET_A57(1) }, /* CPU4 - A57 Cluster */ { + .bpmp_cluster_id = 1, .edvd_offset = EDVD_OFFSET_A57(2) }, /* CPU5 - A57 Cluster */ { - .edvd_offset = EDVD_OFFSET_A57(3) - }, - -}; - -struct tegra186_cpufreq_cluster_info { - int cpus[4]; - unsigned int bpmp_cluster_id; -}; - -#define NO_CPU -1 -static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = { - /* Denver cluster */ - { - .cpus = { 1, 2, NO_CPU, NO_CPU }, - .bpmp_cluster_id = 0, - }, - /* A57 cluster */ - { - .cpus = { 0, 3, 4, 5 }, .bpmp_cluster_id = 1, + .edvd_offset = EDVD_OFFSET_A57(3) }, }; struct tegra186_cpufreq_cluster { - const struct tegra186_cpufreq_cluster_info *info; struct cpufreq_frequency_table *table; u32 ref_clk_khz; u32 div; @@ -78,8 +65,6 @@ struct tegra186_cpufreq_cluster { struct tegra186_cpufreq_data { void __iomem *regs; - - size_t num_clusters; struct tegra186_cpufreq_cluster *clusters; const struct tegra186_cpufreq_cpu *cpus; }; @@ -87,25 +72,9 @@ struct tegra186_cpufreq_data { static int tegra186_cpufreq_init(struct cpufreq_policy *policy) { struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); - unsigned int i; - - for (i = 0; i < data->num_clusters; i++) { - struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; - const struct tegra186_cpufreq_cluster_info *info = - cluster->info; - int core; - - for (core = 0; core < ARRAY_SIZE(info->cpus); core++) { - if (info->cpus[core] == policy->cpu) - break; - } - if (core == ARRAY_SIZE(info->cpus)) - continue; - - policy->freq_table = cluster->table; - break; - } + unsigned int cluster = data->cpus[policy->cpu].bpmp_cluster_id; + policy->freq_table = data->clusters[cluster].table; policy->cpuinfo.transition_latency = 300 * 1000; policy->driver_data = NULL; @@ -128,8 +97,9 @@ static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, static unsigned int tegra186_cpufreq_get(unsigned int cpu) { struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); + struct tegra186_cpufreq_cluster *cluster; struct cpufreq_policy *policy; - unsigned int i, edvd_offset, freq = 0; + unsigned int edvd_offset, cluster_id; u32 ndiv; policy = cpufreq_cpu_get(cpu); @@ -138,24 +108,11 @@ static unsigned int tegra186_cpufreq_get(unsigned int cpu) edvd_offset = data->cpus[policy->cpu].edvd_offset; ndiv = readl(data->regs + edvd_offset) & EDVD_CORE_VOLT_FREQ_F_MASK; - - for (i = 0; i < data->num_clusters; i++) { - struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; - int core; - - for (core = 0; core < ARRAY_SIZE(cluster->info->cpus); core++) { - if (cluster->info->cpus[core] != policy->cpu) - continue; - - freq = (cluster->ref_clk_khz * ndiv) / cluster->div; - goto out; - } - } - -out: + cluster_id = data->cpus[policy->cpu].bpmp_cluster_id; + cluster = &data->clusters[cluster_id]; cpufreq_cpu_put(policy); - return freq; + return (cluster->ref_clk_khz * ndiv) / cluster->div; } static struct cpufreq_driver tegra186_cpufreq_driver = { @@ -171,7 +128,7 @@ static struct cpufreq_driver tegra186_cpufreq_driver = { static struct cpufreq_frequency_table *init_vhint_table( struct platform_device *pdev, struct tegra_bpmp *bpmp, - struct tegra186_cpufreq_cluster *cluster) + struct tegra186_cpufreq_cluster *cluster, unsigned int cluster_id) { struct cpufreq_frequency_table *table; struct mrq_cpu_vhint_request req; @@ -190,7 +147,7 @@ static struct cpufreq_frequency_table *init_vhint_table( memset(&req, 0, sizeof(req)); req.addr = phys; - req.cluster_id = cluster->info->bpmp_cluster_id; + req.cluster_id = cluster_id; memset(&msg, 0, sizeof(msg)); msg.mrq = MRQ_CPU_VHINT; @@ -264,12 +221,11 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) if (!data) return -ENOMEM; - data->clusters = devm_kcalloc(&pdev->dev, ARRAY_SIZE(tegra186_clusters), + data->clusters = devm_kcalloc(&pdev->dev, TEGRA186_NUM_CLUSTERS, sizeof(*data->clusters), GFP_KERNEL); if (!data->clusters) return -ENOMEM; - data->num_clusters = ARRAY_SIZE(tegra186_clusters); data->cpus = tegra186_cpus; bpmp = tegra_bpmp_get(&pdev->dev); @@ -282,11 +238,10 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) goto put_bpmp; } - for (i = 0; i < data->num_clusters; i++) { + for (i = 0; i < TEGRA186_NUM_CLUSTERS; i++) { struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; - cluster->info = &tegra186_clusters[i]; - cluster->table = init_vhint_table(pdev, bpmp, cluster); + cluster->table = init_vhint_table(pdev, bpmp, cluster, i); if (IS_ERR(cluster->table)) { err = PTR_ERR(cluster->table); goto put_bpmp; From 93549516d44681261d2d209186449c6125beccc1 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 2 Dec 2020 09:14:18 +0000 Subject: [PATCH 62/86] cpufreq: tegra194: Remove unnecessary frequency calculation The Tegra194 CPUFREQ driver sets the CPUFREQ_NEED_INITIAL_FREQ_CHECK flag which means that the CPUFREQ framework will call the 'get' callback on boot to determine the current frequency of the CPUs. Therefore, it is not necessary for the Tegra194 CPUFREQ driver to internally call the tegra194_get_speed_common() during initialisation to query the current frequency as well. Fix this by removing the call to the tegra194_get_speed_common() during initialisation and simplify the code. Signed-off-by: Jon Hunter Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 79015875f346..a706ba929424 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -21,7 +21,6 @@ #define KHZ 1000 #define REF_CLK_MHZ 408 /* 408 MHz */ #define US_DELAY 500 -#define US_DELAY_MIN 2 #define CPUFREQ_TBL_STEP_HZ (50 * KHZ * KHZ) #define MAX_CNT ~0U @@ -44,7 +43,6 @@ struct tegra194_cpufreq_data { struct tegra_cpu_ctr { u32 cpu; - u32 delay; u32 coreclk_cnt, last_coreclk_cnt; u32 refclk_cnt, last_refclk_cnt; }; @@ -112,7 +110,7 @@ static void tegra_read_counters(struct work_struct *work) val = read_freq_feedback(); c->last_refclk_cnt = lower_32_bits(val); c->last_coreclk_cnt = upper_32_bits(val); - udelay(c->delay); + udelay(US_DELAY); val = read_freq_feedback(); c->refclk_cnt = lower_32_bits(val); c->coreclk_cnt = upper_32_bits(val); @@ -139,7 +137,7 @@ static void tegra_read_counters(struct work_struct *work) * @cpu - logical cpu whose freq to be updated * Returns freq in KHz on success, 0 if cpu is offline */ -static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay) +static unsigned int tegra194_get_speed_common(u32 cpu) { struct read_counters_work read_counters_work; struct tegra_cpu_ctr c; @@ -153,7 +151,6 @@ static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay) * interrupts enabled. */ read_counters_work.c.cpu = cpu; - read_counters_work.c.delay = delay; INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters); queue_work_on(cpu, read_counters_wq, &read_counters_work.work); flush_work(&read_counters_work.work); @@ -209,7 +206,7 @@ static unsigned int tegra194_get_speed(u32 cpu) smp_call_function_single(cpu, get_cpu_cluster, &cl, true); /* reconstruct actual cpu freq using counters */ - rate = tegra194_get_speed_common(cpu, US_DELAY); + rate = tegra194_get_speed_common(cpu); /* get last written ndiv value */ ret = smp_call_function_single(cpu, get_cpu_ndiv, &ndiv, true); @@ -248,9 +245,6 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy) if (cl >= data->num_clusters) return -EINVAL; - /* boot freq */ - policy->cur = tegra194_get_speed_common(policy->cpu, US_DELAY_MIN); - /* set same policy for all cpus in a cluster */ for (cpu = (cl * 2); cpu < ((cl + 1) * 2); cpu++) cpumask_set_cpu(cpu, policy->cpus); From f45f89a778e8a61d9c79405e8c716058b6ba12f2 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 2 Dec 2020 09:14:19 +0000 Subject: [PATCH 63/86] cpufreq: tegra194: Rename tegra194_get_speed_common function The function tegra194_get_speed_common() uses hardware timers to calculate the current CPUFREQ and so rename this function to be tegra194_calculate_speed() to reflect what it does. Signed-off-by: Jon Hunter Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index a706ba929424..6a67f36f3b80 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -137,7 +137,7 @@ static void tegra_read_counters(struct work_struct *work) * @cpu - logical cpu whose freq to be updated * Returns freq in KHz on success, 0 if cpu is offline */ -static unsigned int tegra194_get_speed_common(u32 cpu) +static unsigned int tegra194_calculate_speed(u32 cpu) { struct read_counters_work read_counters_work; struct tegra_cpu_ctr c; @@ -206,7 +206,7 @@ static unsigned int tegra194_get_speed(u32 cpu) smp_call_function_single(cpu, get_cpu_cluster, &cl, true); /* reconstruct actual cpu freq using counters */ - rate = tegra194_get_speed_common(cpu); + rate = tegra194_calculate_speed(cpu); /* get last written ndiv value */ ret = smp_call_function_single(cpu, get_cpu_ndiv, &ndiv, true); From 7482c5cb90e5a7f9e9e12dd154d405e0219656e3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 24 Nov 2020 20:44:00 +0100 Subject: [PATCH 64/86] PM: ACPI: PCI: Drop acpi_pm_set_bridge_wakeup() The idea behind acpi_pm_set_bridge_wakeup() was to allow bridges to be reference counted for wakeup enabling, because they may be enabled to signal wakeup on behalf of their subordinate devices and that may happen for multiple times in a row, whereas for the other devices it only makes sense to enable wakeup signaling once. However, this becomes problematic if the bridge itself is suspended, because it is treated as a "regular" device in that case and the reference counting doesn't work. For instance, suppose that there are two devices below a bridge and they both can signal wakeup. Every time one of them is suspended, wakeup signaling is enabled for the bridge, so when they both have been suspended, the bridge's wakeup reference counter value is 2. Say that the bridge is suspended subsequently and acpi_pci_wakeup() is called for it. Because the bridge can signal wakeup, that function will invoke acpi_pm_set_device_wakeup() to configure it and __acpi_pm_set_device_wakeup() will be called with the last argument equal to 1. This causes __acpi_device_wakeup_enable() invoked by it to omit the reference counting, because the reference counter of the target device (the bridge) is 2 at that time. Now say that the bridge resumes and one of the device below it resumes too, so the bridge's reference counter becomes 0 and wakeup signaling is disabled for it, but there is still the other suspended device which may need the bridge to signal wakeup on its behalf and that is not going to work. To address this scenario, use wakeup enable reference counting for all devices, not just for bridges, so drop the last argument from __acpi_device_wakeup_enable() and __acpi_pm_set_device_wakeup(), which causes acpi_pm_set_device_wakeup() and acpi_pm_set_bridge_wakeup() to become identical, so drop the latter and use the former instead of it everywhere. Fixes: 1ba51a7c1496 ("ACPI / PCI / PM: Rework acpi_pci_propagate_wakeup()") Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Acked-by: Bjorn Helgaas Cc: 4.14+ # 4.14+ --- drivers/acpi/device_pm.c | 41 ++++++++++++---------------------------- drivers/pci/pci-acpi.c | 4 ++-- include/acpi/acpi_bus.h | 5 ----- 3 files changed, 14 insertions(+), 36 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 94d91c67aeae..ef77dbcaf58f 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -749,7 +749,7 @@ static void acpi_pm_notify_work_func(struct acpi_device_wakeup_context *context) static DEFINE_MUTEX(acpi_wakeup_lock); static int __acpi_device_wakeup_enable(struct acpi_device *adev, - u32 target_state, int max_count) + u32 target_state) { struct acpi_device_wakeup *wakeup = &adev->wakeup; acpi_status status; @@ -757,9 +757,10 @@ static int __acpi_device_wakeup_enable(struct acpi_device *adev, mutex_lock(&acpi_wakeup_lock); - if (wakeup->enable_count >= max_count) + if (wakeup->enable_count >= INT_MAX) { + acpi_handle_info(adev->handle, "Wakeup enable count out of bounds!\n"); goto out; - + } if (wakeup->enable_count > 0) goto inc; @@ -799,7 +800,7 @@ out: */ static int acpi_device_wakeup_enable(struct acpi_device *adev, u32 target_state) { - return __acpi_device_wakeup_enable(adev, target_state, 1); + return __acpi_device_wakeup_enable(adev, target_state); } /** @@ -829,8 +830,12 @@ out: mutex_unlock(&acpi_wakeup_lock); } -static int __acpi_pm_set_device_wakeup(struct device *dev, bool enable, - int max_count) +/** + * acpi_pm_set_device_wakeup - Enable/disable remote wakeup for given device. + * @dev: Device to enable/disable to generate wakeup events. + * @enable: Whether to enable or disable the wakeup functionality. + */ +int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { struct acpi_device *adev; int error; @@ -850,36 +855,14 @@ static int __acpi_pm_set_device_wakeup(struct device *dev, bool enable, return 0; } - error = __acpi_device_wakeup_enable(adev, acpi_target_system_state(), - max_count); + error = __acpi_device_wakeup_enable(adev, acpi_target_system_state()); if (!error) dev_dbg(dev, "Wakeup enabled by ACPI\n"); return error; } - -/** - * acpi_pm_set_device_wakeup - Enable/disable remote wakeup for given device. - * @dev: Device to enable/disable to generate wakeup events. - * @enable: Whether to enable or disable the wakeup functionality. - */ -int acpi_pm_set_device_wakeup(struct device *dev, bool enable) -{ - return __acpi_pm_set_device_wakeup(dev, enable, 1); -} EXPORT_SYMBOL_GPL(acpi_pm_set_device_wakeup); -/** - * acpi_pm_set_bridge_wakeup - Enable/disable remote wakeup for given bridge. - * @dev: Bridge device to enable/disable to generate wakeup events. - * @enable: Whether to enable or disable the wakeup functionality. - */ -int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable) -{ - return __acpi_pm_set_device_wakeup(dev, enable, INT_MAX); -} -EXPORT_SYMBOL_GPL(acpi_pm_set_bridge_wakeup); - /** * acpi_dev_pm_low_power - Put ACPI device into a low-power state. * @dev: Device to put into a low-power state. diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index bf03648c2072..745a4e0c4994 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1060,7 +1060,7 @@ static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable) { while (bus->parent) { if (acpi_pm_device_can_wakeup(&bus->self->dev)) - return acpi_pm_set_bridge_wakeup(&bus->self->dev, enable); + return acpi_pm_set_device_wakeup(&bus->self->dev, enable); bus = bus->parent; } @@ -1068,7 +1068,7 @@ static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable) /* We have reached the root bus. */ if (bus->bridge) { if (acpi_pm_device_can_wakeup(bus->bridge)) - return acpi_pm_set_bridge_wakeup(bus->bridge, enable); + return acpi_pm_set_device_wakeup(bus->bridge, enable); } return 0; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index a3abcc4b7d9f..6d1879bf9440 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -620,7 +620,6 @@ acpi_status acpi_remove_pm_notifier(struct acpi_device *adev); bool acpi_pm_device_can_wakeup(struct device *dev); int acpi_pm_device_sleep_state(struct device *, int *, int); int acpi_pm_set_device_wakeup(struct device *dev, bool enable); -int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable); #else static inline void acpi_pm_wakeup_event(struct device *dev) { @@ -651,10 +650,6 @@ static inline int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { return -ENODEV; } -static inline int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable) -{ - return -ENODEV; -} #endif #ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT From b93b7ef61764819b6060f69e35ea9d6563b9b5d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 24 Nov 2020 20:46:38 +0100 Subject: [PATCH 65/86] PM: ACPI: Refresh wakeup device power configuration every time When wakeup signaling is enabled for a bridge for the second (or every next) time in a row, its existing device wakeup power configuration may not match the new conditions. For example, some devices below it may have been put into low-power states and that changes the device wakeup power conditions or similar. This causes functional problems to appear on some systems (for example, because of it the Thunderbolt port on Dell Precision 5550 cannot detect devices plugged in after it has been suspended). For this reason, modify __acpi_device_wakeup_enable() to refresh the device wakeup power configuration of the target device on every invocation, not just when it is called for that device first time in a row. Signed-off-by: Rafael J. Wysocki Reported-by: Kai-Heng Feng Tested-by: Kai-Heng Feng Reviewed-by: Mika Westerberg --- drivers/acpi/device_pm.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index ef77dbcaf58f..3586434d0ded 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -757,16 +757,26 @@ static int __acpi_device_wakeup_enable(struct acpi_device *adev, mutex_lock(&acpi_wakeup_lock); - if (wakeup->enable_count >= INT_MAX) { - acpi_handle_info(adev->handle, "Wakeup enable count out of bounds!\n"); - goto out; - } + /* + * If the device wakeup power is already enabled, disable it and enable + * it again in case it depends on the configuration of subordinate + * devices and the conditions have changed since it was enabled last + * time. + */ if (wakeup->enable_count > 0) - goto inc; + acpi_disable_wakeup_device_power(adev); error = acpi_enable_wakeup_device_power(adev, target_state); - if (error) + if (error) { + if (wakeup->enable_count > 0) { + acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); + wakeup->enable_count = 0; + } goto out; + } + + if (wakeup->enable_count > 0) + goto inc; status = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number); if (ACPI_FAILURE(status)) { @@ -779,7 +789,10 @@ static int __acpi_device_wakeup_enable(struct acpi_device *adev, (unsigned int)wakeup->gpe_number); inc: - wakeup->enable_count++; + if (wakeup->enable_count < INT_MAX) + wakeup->enable_count++; + else + acpi_handle_info(adev->handle, "Wakeup enable count out of bounds!\n"); out: mutex_unlock(&acpi_wakeup_lock); From 76ea4d8eeefbfdd37e47c6fd579d0d5852457618 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 24 Nov 2020 10:43:45 +0000 Subject: [PATCH 66/86] firmware: arm_scmi: Add power_scale_mw_get() interface Add a new interface to the existing perf_ops and export the information about the power values scale. This would be used by the cpufreq driver and Energy Model framework to set the performance domains scale: milli-Watts or abstract scale. Suggested-by: Morten Rasmussen Reviewed-by: Cristian Marussi Signed-off-by: Lukasz Luba Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/firmware/arm_scmi/perf.c | 8 ++++++++ include/linux/scmi_protocol.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 82fb3babff72..e374b1125fca 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -750,6 +750,13 @@ static bool scmi_fast_switch_possible(const struct scmi_handle *handle, return dom->fc_info && dom->fc_info->level_set_addr; } +static bool scmi_power_scale_mw_get(const struct scmi_handle *handle) +{ + struct scmi_perf_info *pi = handle->perf_priv; + + return pi->power_scale_mw; +} + static const struct scmi_perf_ops perf_ops = { .limits_set = scmi_perf_limits_set, .limits_get = scmi_perf_limits_get, @@ -762,6 +769,7 @@ static const struct scmi_perf_ops perf_ops = { .freq_get = scmi_dvfs_freq_get, .est_power_get = scmi_dvfs_est_power_get, .fast_switch_possible = scmi_fast_switch_possible, + .power_scale_mw_get = scmi_power_scale_mw_get, }; static int scmi_perf_set_notify_enabled(const struct scmi_handle *handle, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9cd312a1ff92..c77e4e11e788 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -121,6 +121,7 @@ struct scmi_perf_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_handle *handle, struct device *dev); + bool (*power_scale_mw_get)(const struct scmi_handle *handle); }; /** From f9b0498d29404f230894490d622e57e481c7d45a Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 24 Nov 2020 10:43:46 +0000 Subject: [PATCH 67/86] cpufreq: arm_scmi: Discover the power scale in performance protocol Add mechanism to discover the power scale present in the performance protocol for all domains. Provide this information to Energy Model, which then can be checked in other frameworks, e.g. thermal. Suggested-by: Morten Rasmussen Signed-off-by: Lukasz Luba Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 3714a4cd07fa..144afd1265c1 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -125,6 +125,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) struct scmi_data *priv; struct cpufreq_frequency_table *freq_table; struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power); + bool power_scale_mw; cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { @@ -188,8 +189,9 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = handle->perf_ops->fast_switch_possible(handle, cpu_dev); + power_scale_mw = handle->perf_ops->power_scale_mw_get(handle); em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, - false); + power_scale_mw); return 0; From 1080399542075bb0e9d46ea80418d76784d1ece8 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Sat, 28 Nov 2020 07:09:23 +0530 Subject: [PATCH 68/86] PM / EM: Micro optimization in em_cpu_energy When the sum of the utilization of CPUs in a power domain is zero, return the energy as 0 without doing any computations. Acked-by: Quentin Perret Reviewed-by: Dietmar Eggemann Signed-off-by: Pavankumar Kondeti Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 9618c0a46ef4..757fc60658fa 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -106,6 +106,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, struct em_perf_state *ps; int i, cpu; + if (!sum_util) + return 0; + /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested From cf1fac943c6341dfed1db1293864c9fcad47bac3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Nov 2020 11:24:32 +0530 Subject: [PATCH 69/86] opp: Reduce the size of critical section in _opp_kref_release() There is a lot of stuff here which can be done outside of the opp_table->lock, do that. This helps avoiding a circular dependency lockdeps around debugfs. Reported-by: Rob Clark Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 94 +++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 9915e8487f0b..27a0e49b24ab 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1252,9 +1252,14 @@ void _opp_free(struct dev_pm_opp *opp) kfree(opp); } -static void _opp_kref_release(struct dev_pm_opp *opp, - struct opp_table *opp_table) +static void _opp_kref_release(struct kref *kref) { + struct dev_pm_opp *opp = container_of(kref, struct dev_pm_opp, kref); + struct opp_table *opp_table = opp->opp_table; + + list_del(&opp->node); + mutex_unlock(&opp_table->lock); + /* * Notify the changes in the availability of the operable * frequency/voltage list. @@ -1262,27 +1267,9 @@ static void _opp_kref_release(struct dev_pm_opp *opp, blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_REMOVE, opp); _of_opp_free_required_opps(opp_table, opp); opp_debug_remove_one(opp); - list_del(&opp->node); kfree(opp); } -static void _opp_kref_release_unlocked(struct kref *kref) -{ - struct dev_pm_opp *opp = container_of(kref, struct dev_pm_opp, kref); - struct opp_table *opp_table = opp->opp_table; - - _opp_kref_release(opp, opp_table); -} - -static void _opp_kref_release_locked(struct kref *kref) -{ - struct dev_pm_opp *opp = container_of(kref, struct dev_pm_opp, kref); - struct opp_table *opp_table = opp->opp_table; - - _opp_kref_release(opp, opp_table); - mutex_unlock(&opp_table->lock); -} - void dev_pm_opp_get(struct dev_pm_opp *opp) { kref_get(&opp->kref); @@ -1290,16 +1277,10 @@ void dev_pm_opp_get(struct dev_pm_opp *opp) void dev_pm_opp_put(struct dev_pm_opp *opp) { - kref_put_mutex(&opp->kref, _opp_kref_release_locked, - &opp->opp_table->lock); + kref_put_mutex(&opp->kref, _opp_kref_release, &opp->opp_table->lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_put); -static void dev_pm_opp_put_unlocked(struct dev_pm_opp *opp) -{ - kref_put(&opp->kref, _opp_kref_release_unlocked); -} - /** * dev_pm_opp_remove() - Remove an OPP from OPP table * @dev: device for which we do this operation @@ -1343,30 +1324,49 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq) } EXPORT_SYMBOL_GPL(dev_pm_opp_remove); +static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table, + bool dynamic) +{ + struct dev_pm_opp *opp = NULL, *temp; + + mutex_lock(&opp_table->lock); + list_for_each_entry(temp, &opp_table->opp_list, node) { + if (dynamic == temp->dynamic) { + opp = temp; + break; + } + } + + mutex_unlock(&opp_table->lock); + return opp; +} + bool _opp_remove_all_static(struct opp_table *opp_table) { - struct dev_pm_opp *opp, *tmp; - bool ret = true; + struct dev_pm_opp *opp; mutex_lock(&opp_table->lock); if (!opp_table->parsed_static_opps) { - ret = false; - goto unlock; + mutex_unlock(&opp_table->lock); + return false; } - if (--opp_table->parsed_static_opps) - goto unlock; - - list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { - if (!opp->dynamic) - dev_pm_opp_put_unlocked(opp); + if (--opp_table->parsed_static_opps) { + mutex_unlock(&opp_table->lock); + return true; } -unlock: mutex_unlock(&opp_table->lock); - return ret; + /* + * Can't remove the OPP from under the lock, debugfs removal needs to + * happen lock less to avoid circular dependency issues. + */ + while ((opp = _opp_get_next(opp_table, false))) + dev_pm_opp_put(opp); + + return true; } /** @@ -1378,21 +1378,21 @@ unlock: void dev_pm_opp_remove_all_dynamic(struct device *dev) { struct opp_table *opp_table; - struct dev_pm_opp *opp, *temp; + struct dev_pm_opp *opp; int count = 0; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) return; - mutex_lock(&opp_table->lock); - list_for_each_entry_safe(opp, temp, &opp_table->opp_list, node) { - if (opp->dynamic) { - dev_pm_opp_put_unlocked(opp); - count++; - } + /* + * Can't remove the OPP from under the lock, debugfs removal needs to + * happen lock less to avoid circular dependency issues. + */ + while ((opp = _opp_get_next(opp_table, true))) { + dev_pm_opp_put(opp); + count++; } - mutex_unlock(&opp_table->lock); /* Drop the references taken by dev_pm_opp_add() */ while (count--) From 873c9851eb54b78c27a0d753f6dd7e377572a0aa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 10:28:43 +0530 Subject: [PATCH 70/86] cpufreq: dt: Don't (ab)use dev_pm_opp_get_opp_table() to create OPP table Initially, the helper dev_pm_opp_get_opp_table() was supposed to be used only for the OPP core's internal use (it tries to find an existing OPP table and if it doesn't find one, then it allocates the OPP table). Sometime back, the cpufreq-dt driver started using it to make sure all the relevant resources required by the OPP core are available earlier during initialization process to properly propagate -EPROBE_DEFER. It worked but it also abused the API to create an OPP table, which should be created with the help of other helpers provided by the OPP core. The OPP core will be updated in a later commit to limit the scope of dev_pm_opp_get_opp_table() to only finding an existing OPP table and not create one. This commit updates the cpufreq-dt driver before that happens. Now the cpufreq-dt driver creates the OPP and cpufreq tables for all the CPUs from driver's init callback itself. Tested-by: Marek Szyprowski Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt.c | 155 +++++++++++++++-------------------- 1 file changed, 67 insertions(+), 88 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index e363ae04aac6..5aa3d4e3140d 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -30,7 +30,7 @@ struct private_data { cpumask_var_t cpus; struct device *cpu_dev; struct opp_table *opp_table; - struct opp_table *reg_opp_table; + struct cpufreq_frequency_table *freq_table; bool have_static_opps; }; @@ -102,7 +102,6 @@ node_put: static int cpufreq_init(struct cpufreq_policy *policy) { - struct cpufreq_frequency_table *freq_table; struct private_data *priv; struct device *cpu_dev; struct clk *cpu_clk; @@ -114,9 +113,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) pr_err("failed to find data for cpu%d\n", policy->cpu); return -ENODEV; } - cpu_dev = priv->cpu_dev; - cpumask_copy(policy->cpus, priv->cpus); cpu_clk = clk_get(cpu_dev, NULL); if (IS_ERR(cpu_clk)) { @@ -125,67 +122,32 @@ static int cpufreq_init(struct cpufreq_policy *policy) return ret; } - /* - * Initialize OPP tables for all policy->cpus. They will be shared by - * all CPUs which have marked their CPUs shared with OPP bindings. - * - * For platforms not using operating-points-v2 bindings, we do this - * before updating policy->cpus. Otherwise, we will end up creating - * duplicate OPPs for policy->cpus. - * - * OPPs might be populated at runtime, don't check for error here - */ - if (!dev_pm_opp_of_cpumask_add_table(policy->cpus)) - priv->have_static_opps = true; - - /* - * But we need OPP table to function so if it is not there let's - * give platform code chance to provide it for us. - */ - ret = dev_pm_opp_get_opp_count(cpu_dev); - if (ret <= 0) { - dev_err(cpu_dev, "OPP table can't be empty\n"); - ret = -ENODEV; - goto out_free_opp; - } - - ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); - if (ret) { - dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); - goto out_free_opp; - } + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) + transition_latency = CPUFREQ_ETERNAL; + cpumask_copy(policy->cpus, priv->cpus); policy->driver_data = priv; policy->clk = cpu_clk; - policy->freq_table = freq_table; - + policy->freq_table = priv->freq_table; policy->suspend_freq = dev_pm_opp_get_suspend_opp_freq(cpu_dev) / 1000; + policy->cpuinfo.transition_latency = transition_latency; + policy->dvfs_possible_from_any_cpu = true; /* Support turbo/boost mode */ if (policy_has_boost_freq(policy)) { /* This gets disabled by core on driver unregister */ ret = cpufreq_enable_boost_support(); if (ret) - goto out_free_cpufreq_table; + goto out_clk_put; cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; } - transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); - if (!transition_latency) - transition_latency = CPUFREQ_ETERNAL; - - policy->cpuinfo.transition_latency = transition_latency; - policy->dvfs_possible_from_any_cpu = true; - dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; -out_free_cpufreq_table: - dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); -out_free_opp: - if (priv->have_static_opps) - dev_pm_opp_of_cpumask_remove_table(policy->cpus); +out_clk_put: clk_put(cpu_clk); return ret; @@ -208,11 +170,6 @@ static int cpufreq_offline(struct cpufreq_policy *policy) static int cpufreq_exit(struct cpufreq_policy *policy) { - struct private_data *priv = policy->driver_data; - - dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); - if (priv->have_static_opps) - dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); clk_put(policy->clk); return 0; } @@ -236,6 +193,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) { struct private_data *priv; struct device *cpu_dev; + bool fallback = false; const char *reg_name; int ret; @@ -254,68 +212,87 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL)) return -ENOMEM; + cpumask_set_cpu(cpu, priv->cpus); priv->cpu_dev = cpu_dev; - /* Try to get OPP table early to ensure resources are available */ - priv->opp_table = dev_pm_opp_get_opp_table(cpu_dev); - if (IS_ERR(priv->opp_table)) { - ret = PTR_ERR(priv->opp_table); - if (ret != -EPROBE_DEFER) - dev_err(cpu_dev, "failed to get OPP table: %d\n", ret); - goto free_cpumask; - } - /* * OPP layer will be taking care of regulators now, but it needs to know * the name of the regulator first. */ reg_name = find_supply_name(cpu_dev); if (reg_name) { - priv->reg_opp_table = dev_pm_opp_set_regulators(cpu_dev, - ®_name, 1); - if (IS_ERR(priv->reg_opp_table)) { - ret = PTR_ERR(priv->reg_opp_table); + priv->opp_table = dev_pm_opp_set_regulators(cpu_dev, ®_name, + 1); + if (IS_ERR(priv->opp_table)) { + ret = PTR_ERR(priv->opp_table); if (ret != -EPROBE_DEFER) dev_err(cpu_dev, "failed to set regulators: %d\n", ret); - goto put_table; + goto free_cpumask; } } - /* Find OPP sharing information so we can fill pri->cpus here */ /* Get OPP-sharing information from "operating-points-v2" bindings */ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->cpus); if (ret) { if (ret != -ENOENT) - goto put_reg; + goto out; /* * operating-points-v2 not supported, fallback to all CPUs share * OPP for backward compatibility if the platform hasn't set * sharing CPUs. */ - if (dev_pm_opp_get_sharing_cpus(cpu_dev, priv->cpus)) { - cpumask_setall(priv->cpus); + if (dev_pm_opp_get_sharing_cpus(cpu_dev, priv->cpus)) + fallback = true; + } - /* - * OPP tables are initialized only for cpu, do it for - * others as well. - */ - ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->cpus); - if (ret) - dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", - __func__, ret); - } + /* + * Initialize OPP tables for all priv->cpus. They will be shared by + * all CPUs which have marked their CPUs shared with OPP bindings. + * + * For platforms not using operating-points-v2 bindings, we do this + * before updating priv->cpus. Otherwise, we will end up creating + * duplicate OPPs for the CPUs. + * + * OPPs might be populated at runtime, don't check for error here. + */ + if (!dev_pm_opp_of_cpumask_add_table(priv->cpus)) + priv->have_static_opps = true; + + /* + * The OPP table must be initialized, statically or dynamically, by this + * point. + */ + ret = dev_pm_opp_get_opp_count(cpu_dev); + if (ret <= 0) { + dev_err(cpu_dev, "OPP table can't be empty\n"); + ret = -ENODEV; + goto out; + } + + if (fallback) { + cpumask_setall(priv->cpus); + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->cpus); + if (ret) + dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", + __func__, ret); + } + + ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &priv->freq_table); + if (ret) { + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); + goto out; } list_add(&priv->node, &priv_list); return 0; -put_reg: - if (priv->reg_opp_table) - dev_pm_opp_put_regulators(priv->reg_opp_table); -put_table: - dev_pm_opp_put_opp_table(priv->opp_table); +out: + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(priv->cpus); + if (priv->opp_table) + dev_pm_opp_put_regulators(priv->opp_table); free_cpumask: free_cpumask_var(priv->cpus); return ret; @@ -326,9 +303,11 @@ static void dt_cpufreq_release(void) struct private_data *priv, *tmp; list_for_each_entry_safe(priv, tmp, &priv_list, node) { - if (priv->reg_opp_table) - dev_pm_opp_put_regulators(priv->reg_opp_table); - dev_pm_opp_put_opp_table(priv->opp_table); + dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &priv->freq_table); + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(priv->cpus); + if (priv->opp_table) + dev_pm_opp_put_regulators(priv->opp_table); free_cpumask_var(priv->cpus); list_del(&priv->node); } From e77dcb0b732dd355ca594909f6c2085dfc46cde2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 10:37:16 +0530 Subject: [PATCH 71/86] opp: Don't create an OPP table from dev_pm_opp_get_opp_table() It has been found that some users (like cpufreq-dt and others on LKML) have abused the helper dev_pm_opp_get_opp_table() to create the OPP table instead of just finding it, which is the wrong thing to do. This routine was meant for OPP core's internal working and exposed the whole functionality by mistake. Change the scope of dev_pm_opp_get_opp_table() to only finding the table. The internal helpers _opp_get_opp_table*() are thus renamed to _add_opp_table*(), dev_pm_opp_get_opp_table_indexed() is removed (as we don't need the index field for finding the OPP table) and so the only user, genpd, is updated. Note that the prototype of _add_opp_table() was already left in opp.h by mistake when it was removed earlier and so we weren't required to add it now. Acked-by: Ulf Hansson Signed-off-by: Viresh Kumar --- drivers/base/power/domain.c | 2 +- drivers/opp/core.c | 29 ++++++++++++++--------------- drivers/opp/of.c | 4 ++-- drivers/opp/opp.h | 1 + include/linux/pm_opp.h | 1 - 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 743268996336..92b750b865d5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2249,7 +2249,7 @@ int of_genpd_add_provider_onecell(struct device_node *np, * Save table for faster processing while setting * performance state. */ - genpd->opp_table = dev_pm_opp_get_opp_table_indexed(&genpd->dev, i); + genpd->opp_table = dev_pm_opp_get_opp_table(&genpd->dev); WARN_ON(IS_ERR(genpd->opp_table)); } diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 27a0e49b24ab..8f53c1b48911 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1138,7 +1138,7 @@ void _get_opp_table_kref(struct opp_table *opp_table) * uses the opp_tables_busy flag to indicate if another creator is in the middle * of adding an OPP table and others should wait for it to finish. */ -static struct opp_table *_opp_get_opp_table(struct device *dev, int index) +struct opp_table *_add_opp_table_indexed(struct device *dev, int index) { struct opp_table *opp_table; @@ -1188,18 +1188,17 @@ unlock: return opp_table; } +struct opp_table *_add_opp_table(struct device *dev) +{ + return _add_opp_table_indexed(dev, 0); +} + struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) { - return _opp_get_opp_table(dev, 0); + return _find_opp_table(dev); } EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_table); -struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, - int index) -{ - return _opp_get_opp_table(dev, index); -} - static void _opp_table_kref_release(struct kref *kref) { struct opp_table *opp_table = container_of(kref, struct opp_table, kref); @@ -1627,7 +1626,7 @@ struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, { struct opp_table *opp_table; - opp_table = dev_pm_opp_get_opp_table(dev); + opp_table = _add_opp_table(dev); if (IS_ERR(opp_table)) return opp_table; @@ -1686,7 +1685,7 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { struct opp_table *opp_table; - opp_table = dev_pm_opp_get_opp_table(dev); + opp_table = _add_opp_table(dev); if (IS_ERR(opp_table)) return opp_table; @@ -1779,7 +1778,7 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, struct regulator *reg; int ret, i; - opp_table = dev_pm_opp_get_opp_table(dev); + opp_table = _add_opp_table(dev); if (IS_ERR(opp_table)) return opp_table; @@ -1887,7 +1886,7 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) struct opp_table *opp_table; int ret; - opp_table = dev_pm_opp_get_opp_table(dev); + opp_table = _add_opp_table(dev); if (IS_ERR(opp_table)) return opp_table; @@ -1955,7 +1954,7 @@ struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, if (!set_opp) return ERR_PTR(-EINVAL); - opp_table = dev_pm_opp_get_opp_table(dev); + opp_table = _add_opp_table(dev); if (IS_ERR(opp_table)) return opp_table; @@ -2039,7 +2038,7 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, int index = 0, ret = -EINVAL; const char **name = names; - opp_table = dev_pm_opp_get_opp_table(dev); + opp_table = _add_opp_table(dev); if (IS_ERR(opp_table)) return opp_table; @@ -2204,7 +2203,7 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) struct opp_table *opp_table; int ret; - opp_table = dev_pm_opp_get_opp_table(dev); + opp_table = _add_opp_table(dev); if (IS_ERR(opp_table)) return PTR_ERR(opp_table); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index aa0ac5d4e479..6b7f0066942d 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -975,7 +975,7 @@ int dev_pm_opp_of_add_table(struct device *dev) struct opp_table *opp_table; int ret; - opp_table = dev_pm_opp_get_opp_table_indexed(dev, 0); + opp_table = _add_opp_table_indexed(dev, 0); if (IS_ERR(opp_table)) return PTR_ERR(opp_table); @@ -1030,7 +1030,7 @@ int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) index = 0; } - opp_table = dev_pm_opp_get_opp_table_indexed(dev, index); + opp_table = _add_opp_table_indexed(dev, index); if (IS_ERR(opp_table)) return PTR_ERR(opp_table); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index ebd930e0b3ca..4ced7ffa8158 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -224,6 +224,7 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *o int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic); void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cpu); struct opp_table *_add_opp_table(struct device *dev); +struct opp_table *_add_opp_table_indexed(struct device *dev, int index); void _put_opp_list_kref(struct opp_table *opp_table); #ifdef CONFIG_OF diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index dbb484524f82..1435c054016a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -90,7 +90,6 @@ struct dev_pm_set_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); -struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); From c7bf8758c955e6272c0f4b2411d7a85abce8fafe Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 12:16:52 +0530 Subject: [PATCH 72/86] opp: Allow dev_pm_opp_put_*() APIs to accept NULL opp_table This allows the callers to drop the unnecessary checks. Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 8f53c1b48911..4268eb359915 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1660,6 +1660,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); */ void dev_pm_opp_put_supported_hw(struct opp_table *opp_table) { + if (unlikely(!opp_table)) + return; + /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); @@ -1716,6 +1719,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); */ void dev_pm_opp_put_prop_name(struct opp_table *opp_table) { + if (unlikely(!opp_table)) + return; + /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); @@ -1844,6 +1850,9 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) { int i; + if (unlikely(!opp_table)) + return; + if (!opp_table->regulators) goto put_opp_table; @@ -1926,6 +1935,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_clkname); */ void dev_pm_opp_put_clkname(struct opp_table *opp_table) { + if (unlikely(!opp_table)) + return; + /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); @@ -1981,6 +1993,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_register_set_opp_helper); */ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) { + if (unlikely(!opp_table)) + return; + /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); @@ -2109,6 +2124,9 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_attach_genpd); */ void dev_pm_opp_detach_genpd(struct opp_table *opp_table) { + if (unlikely(!opp_table)) + return; + /* * Acquire genpd_virt_dev_lock to make sure virt_dev isn't getting * used in parallel. From 5f6ffb8d8f8fdf672cbc4f27888ce075df13d49c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 12:18:39 +0530 Subject: [PATCH 73/86] cpufreq: dt: dev_pm_opp_put_regulators() accepts NULL argument The dev_pm_opp_put_*() APIs now accepts a NULL opp_table pointer and so there is no need for us to carry the extra checks. Drop them. Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 5aa3d4e3140d..ad4234518ef6 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -291,8 +291,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) out: if (priv->have_static_opps) dev_pm_opp_of_cpumask_remove_table(priv->cpus); - if (priv->opp_table) - dev_pm_opp_put_regulators(priv->opp_table); + dev_pm_opp_put_regulators(priv->opp_table); free_cpumask: free_cpumask_var(priv->cpus); return ret; @@ -306,8 +305,7 @@ static void dt_cpufreq_release(void) dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &priv->freq_table); if (priv->have_static_opps) dev_pm_opp_of_cpumask_remove_table(priv->cpus); - if (priv->opp_table) - dev_pm_opp_put_regulators(priv->opp_table); + dev_pm_opp_put_regulators(priv->opp_table); free_cpumask_var(priv->cpus); list_del(&priv->node); } From 2ff8fe13ac6da7a7c45d610cc3237c8556610f07 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 12:18:39 +0530 Subject: [PATCH 74/86] cpufreq: qcom-cpufreq-nvmem: dev_pm_opp_put_*() accepts NULL argument The dev_pm_opp_put_*() APIs now accepts a NULL opp_table pointer and so there is no need for us to carry the extra checks. Drop them. Reviewed-by: Ilia Lin Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index d06b37822c3d..747d602f221e 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -397,19 +397,19 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) free_genpd_opp: for_each_possible_cpu(cpu) { - if (IS_ERR_OR_NULL(drv->genpd_opp_tables[cpu])) + if (IS_ERR(drv->genpd_opp_tables[cpu])) break; dev_pm_opp_detach_genpd(drv->genpd_opp_tables[cpu]); } kfree(drv->genpd_opp_tables); free_opp: for_each_possible_cpu(cpu) { - if (IS_ERR_OR_NULL(drv->names_opp_tables[cpu])) + if (IS_ERR(drv->names_opp_tables[cpu])) break; dev_pm_opp_put_prop_name(drv->names_opp_tables[cpu]); } for_each_possible_cpu(cpu) { - if (IS_ERR_OR_NULL(drv->hw_opp_tables[cpu])) + if (IS_ERR(drv->hw_opp_tables[cpu])) break; dev_pm_opp_put_supported_hw(drv->hw_opp_tables[cpu]); } @@ -430,12 +430,9 @@ static int qcom_cpufreq_remove(struct platform_device *pdev) platform_device_unregister(cpufreq_dt_pdev); for_each_possible_cpu(cpu) { - if (drv->names_opp_tables[cpu]) - dev_pm_opp_put_supported_hw(drv->names_opp_tables[cpu]); - if (drv->hw_opp_tables[cpu]) - dev_pm_opp_put_supported_hw(drv->hw_opp_tables[cpu]); - if (drv->genpd_opp_tables[cpu]) - dev_pm_opp_detach_genpd(drv->genpd_opp_tables[cpu]); + dev_pm_opp_put_supported_hw(drv->names_opp_tables[cpu]); + dev_pm_opp_put_supported_hw(drv->hw_opp_tables[cpu]); + dev_pm_opp_detach_genpd(drv->genpd_opp_tables[cpu]); } kfree(drv->names_opp_tables); From 814568728373699907971f897b89d95736b0d880 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 12:18:39 +0530 Subject: [PATCH 75/86] PM / devfreq: exynos: dev_pm_opp_put_*() accepts NULL argument The dev_pm_opp_put_*() APIs now accepts a NULL opp_table pointer and so there is no need for us to carry the extra check. Drop them. Acked-by: Chanwoo Choi Signed-off-by: Viresh Kumar --- drivers/devfreq/exynos-bus.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 1e684a448c9e..143fd58ec3dc 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -158,10 +158,8 @@ static void exynos_bus_exit(struct device *dev) dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); - if (bus->opp_table) { - dev_pm_opp_put_regulators(bus->opp_table); - bus->opp_table = NULL; - } + dev_pm_opp_put_regulators(bus->opp_table); + bus->opp_table = NULL; } static void exynos_bus_passive_exit(struct device *dev) @@ -444,10 +442,8 @@ err: dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); err_reg: - if (!passive) { - dev_pm_opp_put_regulators(bus->opp_table); - bus->opp_table = NULL; - } + dev_pm_opp_put_regulators(bus->opp_table); + bus->opp_table = NULL; return ret; } From 72ba9e226fac8a9958b5201428a387c348515289 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 12:18:39 +0530 Subject: [PATCH 76/86] drm/lima: dev_pm_opp_put_*() accepts NULL argument The dev_pm_opp_put_*() APIs now accepts a NULL opp_table pointer and so there is no need for us to carry the extra check. Drop them. Reviewed-by: Qiang Yu Signed-off-by: Viresh Kumar --- drivers/gpu/drm/lima/lima_devfreq.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c index bbe02817721b..e7b7b8dfd792 100644 --- a/drivers/gpu/drm/lima/lima_devfreq.c +++ b/drivers/gpu/drm/lima/lima_devfreq.c @@ -110,15 +110,10 @@ void lima_devfreq_fini(struct lima_device *ldev) devfreq->opp_of_table_added = false; } - if (devfreq->regulators_opp_table) { - dev_pm_opp_put_regulators(devfreq->regulators_opp_table); - devfreq->regulators_opp_table = NULL; - } - - if (devfreq->clkname_opp_table) { - dev_pm_opp_put_clkname(devfreq->clkname_opp_table); - devfreq->clkname_opp_table = NULL; - } + dev_pm_opp_put_regulators(devfreq->regulators_opp_table); + dev_pm_opp_put_clkname(devfreq->clkname_opp_table); + devfreq->regulators_opp_table = NULL; + devfreq->clkname_opp_table = NULL; } int lima_devfreq_init(struct lima_device *ldev) From b66ba5b5938f8a51d4cb97d1392065d09551bc75 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 12:18:39 +0530 Subject: [PATCH 77/86] drm/panfrost: dev_pm_opp_put_*() accepts NULL argument The dev_pm_opp_put_*() APIs now accepts a NULL opp_table pointer and so there is no need for us to carry the extra check. Drop them. Reviewed-by: Steven Price Signed-off-by: Viresh Kumar --- drivers/gpu/drm/panfrost/panfrost_devfreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index 8ab025d0035f..97b5abc7c188 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -170,10 +170,8 @@ void panfrost_devfreq_fini(struct panfrost_device *pfdev) pfdevfreq->opp_of_table_added = false; } - if (pfdevfreq->regulators_opp_table) { - dev_pm_opp_put_regulators(pfdevfreq->regulators_opp_table); - pfdevfreq->regulators_opp_table = NULL; - } + dev_pm_opp_put_regulators(pfdevfreq->regulators_opp_table); + pfdevfreq->regulators_opp_table = NULL; } void panfrost_devfreq_resume(struct panfrost_device *pfdev) From e91e3d902b76c3f2a238873a17958080af018f08 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 12:18:39 +0530 Subject: [PATCH 78/86] media: venus: dev_pm_opp_put_*() accepts NULL argument The dev_pm_opp_put_*() APIs now accepts a NULL opp_table pointer and so there is no need for us to carry the extra check. Drop them. Signed-off-by: Viresh Kumar --- drivers/media/platform/qcom/venus/pm_helpers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c index 57877eacecf0..e1e9130288ad 100644 --- a/drivers/media/platform/qcom/venus/pm_helpers.c +++ b/drivers/media/platform/qcom/venus/pm_helpers.c @@ -898,8 +898,7 @@ static void core_put_v4(struct device *dev) if (core->has_opp_table) dev_pm_opp_of_remove_table(dev); - if (core->opp_table) - dev_pm_opp_put_clkname(core->opp_table); + dev_pm_opp_put_clkname(core->opp_table); } From 24b3c963c0108f3da6d978d74a745c824ab551dc Mon Sep 17 00:00:00 2001 From: Nicola Mazzucato Date: Tue, 8 Dec 2020 17:42:26 +0000 Subject: [PATCH 79/86] dt-bindings: opp: Allow empty OPP tables Currently the optional property opp-shared is used within an opp table to tell that a set of devices share their clock/voltage lines (and the OPP points). It is therefore possible to use an empty OPP table to convey only that information, useful in situations where the opp points are provided via other means (hardware. firmware, etc). Update the documentation to remark this additional case and provide an example. Signed-off-by: Nicola Mazzucato Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp.txt | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 9847dfeeffcb..08b3da4736cf 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -65,7 +65,9 @@ Required properties: - OPP nodes: One or more OPP nodes describing voltage-current-frequency combinations. Their name isn't significant but their phandle can be used to - reference an OPP. + reference an OPP. These are mandatory except for the case where the OPP table + is present only to indicate dependency between devices using the opp-shared + property. Optional properties: - opp-shared: Indicates that device nodes using this OPP Table Node's phandle @@ -568,3 +570,53 @@ Example 6: opp-microvolt-, opp-microamp-: }; }; }; + +Example 7: Single cluster Quad-core ARM cortex A53, OPP points from firmware, +distinct clock controls but two sets of clock/voltage/current lines. + +/ { + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a53"; + reg = <0x0 0x100>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 0>; + operating-points-v2 = <&cpu_opp0_table>; + }; + cpu@1 { + compatible = "arm,cortex-a53"; + reg = <0x0 0x101>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 1>; + operating-points-v2 = <&cpu_opp0_table>; + }; + cpu@2 { + compatible = "arm,cortex-a53"; + reg = <0x0 0x102>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 2>; + operating-points-v2 = <&cpu_opp1_table>; + }; + cpu@3 { + compatible = "arm,cortex-a53"; + reg = <0x0 0x103>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 3>; + operating-points-v2 = <&cpu_opp1_table>; + }; + + }; + + cpu_opp0_table: opp0_table { + compatible = "operating-points-v2"; + opp-shared; + }; + + cpu_opp1_table: opp1_table { + compatible = "operating-points-v2"; + opp-shared; + }; +}; From 6ee70e8c34e37a34f4dc2c8bc06febffd375fac4 Mon Sep 17 00:00:00 2001 From: Nicola Mazzucato Date: Tue, 8 Dec 2020 17:42:27 +0000 Subject: [PATCH 80/86] opp: of: Allow empty opp-table with opp-shared The opp binding now allows to have an empty opp table and shared-opp to still describe that devices share v/f lines. When initialising an empty opp table, allow such case by: - treating such conditions with warnings in place of errors - don't fail on empty table Signed-off-by: Nicola Mazzucato Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 874b58756220..96113fc0e18c 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -169,7 +169,8 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, /* Traversing the first OPP node is all we need */ np = of_get_next_available_child(opp_np, NULL); if (!np) { - dev_err(dev, "Empty OPP table\n"); + dev_warn(dev, "Empty OPP table\n"); + return; } @@ -377,7 +378,9 @@ int dev_pm_opp_of_find_icc_paths(struct device *dev, struct icc_path **paths; ret = _bandwidth_supported(dev, opp_table); - if (ret <= 0) + if (ret == -EINVAL) + return 0; /* Empty OPP table is a valid corner-case, let's not fail */ + else if (ret <= 0) return ret; ret = 0; From 718072ceb211833f3c71724f49d733d636067191 Mon Sep 17 00:00:00 2001 From: Thierry Strudel Date: Tue, 8 Dec 2020 11:19:55 -0800 Subject: [PATCH 81/86] PM: domains: create debugfs nodes when adding power domains debugfs nodes were created in genpd_debug_init alled in late_initcall preventing power domains registered though loadable modules to have a debugfs entry. Create/remove debugfs nodes when the power domain is added/removed to/from the internal gpd_list. Signed-off-by: Thierry Strudel Reviewed-by: Greg Kroah-Hartman Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 73 +++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 743268996336..bd4e0051e846 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "power.h" @@ -210,6 +211,18 @@ static void genpd_sd_counter_inc(struct generic_pm_domain *genpd) } #ifdef CONFIG_DEBUG_FS +static struct dentry *genpd_debugfs_dir; + +static void genpd_debug_add(struct generic_pm_domain *genpd); + +static void genpd_debug_remove(struct generic_pm_domain *genpd) +{ + struct dentry *d; + + d = debugfs_lookup(genpd->name, genpd_debugfs_dir); + debugfs_remove(d); +} + static void genpd_update_accounting(struct generic_pm_domain *genpd) { ktime_t delta, now; @@ -234,6 +247,8 @@ static void genpd_update_accounting(struct generic_pm_domain *genpd) genpd->accounting_time = now; } #else +static inline void genpd_debug_add(struct generic_pm_domain *genpd) {} +static inline void genpd_debug_remove(struct generic_pm_domain *genpd) {} static inline void genpd_update_accounting(struct generic_pm_domain *genpd) {} #endif @@ -1954,6 +1969,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd, mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); + genpd_debug_add(genpd); mutex_unlock(&gpd_list_lock); return 0; @@ -1987,6 +2003,7 @@ static int genpd_remove(struct generic_pm_domain *genpd) kfree(link); } + genpd_debug_remove(genpd); list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); @@ -2893,14 +2910,6 @@ core_initcall(genpd_bus_init); /*** debugfs support ***/ #ifdef CONFIG_DEBUG_FS -#include -#include -#include -#include -#include -#include -static struct dentry *genpd_debugfs_dir; - /* * TODO: This function is a slightly modified version of rtpm_status_show * from sysfs.c, so generalize it. @@ -3177,9 +3186,34 @@ DEFINE_SHOW_ATTRIBUTE(total_idle_time); DEFINE_SHOW_ATTRIBUTE(devices); DEFINE_SHOW_ATTRIBUTE(perf_state); -static int __init genpd_debug_init(void) +static void genpd_debug_add(struct generic_pm_domain *genpd) { struct dentry *d; + + if (!genpd_debugfs_dir) + return; + + d = debugfs_create_dir(genpd->name, genpd_debugfs_dir); + + debugfs_create_file("current_state", 0444, + d, genpd, &status_fops); + debugfs_create_file("sub_domains", 0444, + d, genpd, &sub_domains_fops); + debugfs_create_file("idle_states", 0444, + d, genpd, &idle_states_fops); + debugfs_create_file("active_time", 0444, + d, genpd, &active_time_fops); + debugfs_create_file("total_idle_time", 0444, + d, genpd, &total_idle_time_fops); + debugfs_create_file("devices", 0444, + d, genpd, &devices_fops); + if (genpd->set_performance_state) + debugfs_create_file("perf_state", 0444, + d, genpd, &perf_state_fops); +} + +static int __init genpd_debug_init(void) +{ struct generic_pm_domain *genpd; genpd_debugfs_dir = debugfs_create_dir("pm_genpd", NULL); @@ -3187,25 +3221,8 @@ static int __init genpd_debug_init(void) debugfs_create_file("pm_genpd_summary", S_IRUGO, genpd_debugfs_dir, NULL, &summary_fops); - list_for_each_entry(genpd, &gpd_list, gpd_list_node) { - d = debugfs_create_dir(genpd->name, genpd_debugfs_dir); - - debugfs_create_file("current_state", 0444, - d, genpd, &status_fops); - debugfs_create_file("sub_domains", 0444, - d, genpd, &sub_domains_fops); - debugfs_create_file("idle_states", 0444, - d, genpd, &idle_states_fops); - debugfs_create_file("active_time", 0444, - d, genpd, &active_time_fops); - debugfs_create_file("total_idle_time", 0444, - d, genpd, &total_idle_time_fops); - debugfs_create_file("devices", 0444, - d, genpd, &devices_fops); - if (genpd->set_performance_state) - debugfs_create_file("perf_state", 0444, - d, genpd, &perf_state_fops); - } + list_for_each_entry(genpd, &gpd_list, gpd_list_node) + genpd_debug_add(genpd); return 0; } From 2554c32f0b84df1f506546125c9d15c8044a2ec2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Nov 2020 20:25:15 +0100 Subject: [PATCH 82/86] cpufreq: intel_pstate: Simplify intel_cpufreq_update_pstate() Avoid doing the same assignment in both branches of a conditional, do it after the whole conditional instead. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 36a3ccfe6d3d..2a4db856222f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2569,14 +2569,13 @@ static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy, int old_pstate = cpu->pstate.current_pstate; target_pstate = intel_pstate_prepare_request(cpu, target_pstate); - if (hwp_active) { + if (hwp_active) intel_cpufreq_adjust_hwp(cpu, target_pstate, policy->strict_target, fast_switch); - cpu->pstate.current_pstate = target_pstate; - } else if (target_pstate != old_pstate) { + else if (target_pstate != old_pstate) intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch); - cpu->pstate.current_pstate = target_pstate; - } + + cpu->pstate.current_pstate = target_pstate; intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH : INTEL_PSTATE_TRACE_TARGET, old_pstate); From 90ac908a418b836427d6eaf84fbc5062881747fd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Nov 2020 20:26:42 +0100 Subject: [PATCH 83/86] cpufreq: schedutil: Simplify sugov_update_next_freq() Rearrange a conditional to make it more straightforward. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- kernel/sched/cpufreq_schedutil.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 97d318b0cd0c..77736058d8e4 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -102,12 +102,10 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { - if (!sg_policy->need_freq_update) { - if (sg_policy->next_freq == next_freq) - return false; - } else { + if (sg_policy->need_freq_update) sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); - } + else if (sg_policy->next_freq == next_freq) + return false; sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; From 7854c7520b86f0c14f7fcfea6fd1785617844341 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 Nov 2020 17:02:10 +0530 Subject: [PATCH 84/86] cpufreq: stats: Use local_clock() instead of jiffies local_clock() has better precision and accuracy as compared to jiffies, lets use it for time management in cpufreq stats. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 6cd5c8ab5d49..da717f7cd9a9 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -9,9 +9,9 @@ #include #include #include +#include #include - struct cpufreq_stats { unsigned int total_trans; unsigned long long last_time; @@ -30,7 +30,7 @@ struct cpufreq_stats { static void cpufreq_stats_update(struct cpufreq_stats *stats, unsigned long long time) { - unsigned long long cur_time = get_jiffies_64(); + unsigned long long cur_time = local_clock(); stats->time_in_state[stats->last_index] += cur_time - time; stats->last_time = cur_time; @@ -42,7 +42,7 @@ static void cpufreq_stats_reset_table(struct cpufreq_stats *stats) memset(stats->time_in_state, 0, count * sizeof(u64)); memset(stats->trans_table, 0, count * count * sizeof(int)); - stats->last_time = get_jiffies_64(); + stats->last_time = local_clock(); stats->total_trans = 0; /* Adjust for the time elapsed since reset was requested */ @@ -82,18 +82,18 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) * before the reset_pending read above. */ smp_rmb(); - time = get_jiffies_64() - READ_ONCE(stats->reset_time); + time = local_clock() - READ_ONCE(stats->reset_time); } else { time = 0; } } else { time = stats->time_in_state[i]; if (i == stats->last_index) - time += get_jiffies_64() - stats->last_time; + time += local_clock() - stats->last_time; } len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], - jiffies_64_to_clock_t(time)); + nsec_to_clock_t(time)); } return len; } @@ -109,7 +109,7 @@ static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf, * Defer resetting of stats to cpufreq_stats_record_transition() to * avoid races. */ - WRITE_ONCE(stats->reset_time, get_jiffies_64()); + WRITE_ONCE(stats->reset_time, local_clock()); /* * The memory barrier below is to prevent the readers of reset_time from * seeing a stale or partially updated value. @@ -249,7 +249,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) stats->freq_table[i++] = pos->frequency; stats->state_num = i; - stats->last_time = get_jiffies_64(); + stats->last_time = local_clock(); stats->last_index = freq_table_get_index(stats, policy->cur); policy->stats = stats; From ec06e586ab921f8eca86d6c3ed32bffefd3ef50f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 18 Nov 2020 20:02:42 +0100 Subject: [PATCH 85/86] cpufreq: Fix up several kerneldoc comments Fix up the remaining kerneldoc comments that don't adhere to the expected format and clarify some of them a bit. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 70 +++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d66d0df37072..95d25557fb52 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -298,8 +298,10 @@ struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ -/* - * adjust_jiffies - adjust the system "loops_per_jiffy" +/** + * adjust_jiffies - Adjust the system "loops_per_jiffy". + * @val: CPUFREQ_PRECHANGE or CPUFREQ_POSTCHANGE. + * @ci: Frequency change information. * * This function alters the system "loops_per_jiffy" for the clock * speed change. Note that loops_per_jiffy cannot be updated on SMP @@ -331,14 +333,14 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) } /** - * cpufreq_notify_transition - Notify frequency transition and adjust_jiffies. + * cpufreq_notify_transition - Notify frequency transition and adjust jiffies. * @policy: cpufreq policy to enable fast frequency switching for. * @freqs: contain details of the frequency update. * @state: set to CPUFREQ_PRECHANGE or CPUFREQ_POSTCHANGE. * - * This function calls the transition notifiers and the "adjust_jiffies" - * function. It is called twice on all CPU frequency changes that have - * external effects. + * This function calls the transition notifiers and adjust_jiffies(). + * + * It is called twice on all CPU frequency changes that have external effects. */ static void cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, @@ -1646,13 +1648,12 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) } /** - * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're - * in deep trouble. - * @policy: policy managing CPUs - * @new_freq: CPU frequency the CPU actually runs at + * cpufreq_out_of_sync - Fix up actual and saved CPU frequency difference. + * @policy: Policy managing CPUs. + * @new_freq: New CPU frequency. * - * We adjust to current frequency first, and need to clean up later. - * So either call to cpufreq_update_policy() or schedule handle_update()). + * Adjust to the current frequency first and clean up later by either calling + * cpufreq_update_policy(), or scheduling handle_update(). */ static void cpufreq_out_of_sync(struct cpufreq_policy *policy, unsigned int new_freq) @@ -1832,7 +1833,7 @@ int cpufreq_generic_suspend(struct cpufreq_policy *policy) EXPORT_SYMBOL(cpufreq_generic_suspend); /** - * cpufreq_suspend() - Suspend CPUFreq governors + * cpufreq_suspend() - Suspend CPUFreq governors. * * Called during system wide Suspend/Hibernate cycles for suspending governors * as some platforms can't change frequency after this point in suspend cycle. @@ -1868,7 +1869,7 @@ suspend: } /** - * cpufreq_resume() - Resume CPUFreq governors + * cpufreq_resume() - Resume CPUFreq governors. * * Called during system wide Suspend/Hibernate cycle for resuming governors that * are suspended with cpufreq_suspend(). @@ -1920,10 +1921,10 @@ bool cpufreq_driver_test_flags(u16 flags) } /** - * cpufreq_get_current_driver - return current driver's name + * cpufreq_get_current_driver - Return the current driver's name. * - * Return the name string of the currently loaded cpufreq driver - * or NULL, if none. + * Return the name string of the currently registered cpufreq driver or NULL if + * none. */ const char *cpufreq_get_current_driver(void) { @@ -1935,10 +1936,10 @@ const char *cpufreq_get_current_driver(void) EXPORT_SYMBOL_GPL(cpufreq_get_current_driver); /** - * cpufreq_get_driver_data - return current driver data + * cpufreq_get_driver_data - Return current driver data. * - * Return the private data of the currently loaded cpufreq - * driver, or NULL if no cpufreq driver is loaded. + * Return the private data of the currently registered cpufreq driver, or NULL + * if no cpufreq driver has been registered. */ void *cpufreq_get_driver_data(void) { @@ -1954,17 +1955,16 @@ EXPORT_SYMBOL_GPL(cpufreq_get_driver_data); *********************************************************************/ /** - * cpufreq_register_notifier - register a driver with cpufreq - * @nb: notifier function to register - * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER + * cpufreq_register_notifier - Register a notifier with cpufreq. + * @nb: notifier function to register. + * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER. * - * Add a driver to one of two lists: either a list of drivers that - * are notified about clock rate changes (once before and once after - * the transition), or a list of drivers that are notified about - * changes in cpufreq policy. + * Add a notifier to one of two lists: either a list of notifiers that run on + * clock rate changes (once before and once after every transition), or a list + * of notifiers that ron on cpufreq policy changes. * - * This function may sleep, and has the same return conditions as - * blocking_notifier_chain_register. + * This function may sleep and it has the same return values as + * blocking_notifier_chain_register(). */ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) { @@ -2001,14 +2001,14 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) EXPORT_SYMBOL(cpufreq_register_notifier); /** - * cpufreq_unregister_notifier - unregister a driver with cpufreq - * @nb: notifier block to be unregistered - * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER + * cpufreq_unregister_notifier - Unregister a notifier from cpufreq. + * @nb: notifier block to be unregistered. + * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER. * - * Remove a driver from the CPU frequency notifier list. + * Remove a notifier from one of the cpufreq notifier lists. * - * This function may sleep, and has the same return conditions as - * blocking_notifier_chain_unregister. + * This function may sleep and it has the same return values as + * blocking_notifier_chain_unregister(). */ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) { From b96f038432362a20b96d4c52cefeb2936e2cfd2f Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Thu, 26 Nov 2020 09:12:39 +0800 Subject: [PATCH 86/86] cpufreq: Fix cpufreq_online() return value on errors Make cpufreq_online() return negative error codes on all errors that cause the policy to be destroyed, as appropriate. Signed-off-by: Wang ShaoBo [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 95d25557fb52..c17aa2973c44 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1393,8 +1393,10 @@ static int cpufreq_online(unsigned int cpu) policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req), GFP_KERNEL); - if (!policy->min_freq_req) + if (!policy->min_freq_req) { + ret = -ENOMEM; goto out_destroy_policy; + } ret = freq_qos_add_request(&policy->constraints, policy->min_freq_req, FREQ_QOS_MIN, @@ -1431,6 +1433,7 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->get && has_target()) { policy->cur = cpufreq_driver->get(policy->cpu); if (!policy->cur) { + ret = -EIO; pr_err("%s: ->get() failed\n", __func__); goto out_destroy_policy; }