Merge branches 'powercap', 'pm-clk', 'pm-config' and 'pm-opp'

* powercap:
  powercap / RAPL: fix build dependency on iosf_mbi
  powercap / RAPL: add new model ids
  powercap / RAPL: handle atom and core differences
  powercap / RAPL: abstract per cpu type functions

* pm-clk:
  PM / clock_ops: make __pm_clk_enable more generic
  PM / clock_ops: Add pm_clk_add_clk()

* pm-config:
  PM: Kconfig: fix unmet dependency for CPU_PM

* pm-opp:
  PM / OPP replace kfree_rcu() with call_srcu() in opp_set_availability()
  PM / OPP Introduce APIs to remove OPPs
  PM / OPP mark OPPs as 'static' or 'dynamic'
  PM / OPP don't match for existing OPPs when list is empty
  PM / OPP rename 'head' as 'rcu_head' or 'srcu_head' based on its type
This commit is contained in:
Rafael J. Wysocki 2014-12-08 19:57:41 +01:00
7 changed files with 405 additions and 163 deletions

View File

@ -12,6 +12,7 @@
#include <linux/pm.h> #include <linux/pm.h>
#include <linux/pm_clock.h> #include <linux/pm_clock.h>
#include <linux/clk.h> #include <linux/clk.h>
#include <linux/clkdev.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/err.h> #include <linux/err.h>
@ -34,14 +35,20 @@ struct pm_clock_entry {
/** /**
* pm_clk_enable - Enable a clock, reporting any errors * pm_clk_enable - Enable a clock, reporting any errors
* @dev: The device for the given clock * @dev: The device for the given clock
* @clk: The clock being enabled. * @ce: PM clock entry corresponding to the clock.
*/ */
static inline int __pm_clk_enable(struct device *dev, struct clk *clk) static inline int __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce)
{ {
int ret = clk_enable(clk); int ret;
if (ret)
dev_err(dev, "%s: failed to enable clk %p, error %d\n", if (ce->status < PCE_STATUS_ERROR) {
__func__, clk, ret); ret = clk_enable(ce->clk);
if (!ret)
ce->status = PCE_STATUS_ENABLED;
else
dev_err(dev, "%s: failed to enable clk %p, error %d\n",
__func__, ce->clk, ret);
}
return ret; return ret;
} }
@ -53,7 +60,8 @@ static inline int __pm_clk_enable(struct device *dev, struct clk *clk)
*/ */
static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce) static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
{ {
ce->clk = clk_get(dev, ce->con_id); if (!ce->clk)
ce->clk = clk_get(dev, ce->con_id);
if (IS_ERR(ce->clk)) { if (IS_ERR(ce->clk)) {
ce->status = PCE_STATUS_ERROR; ce->status = PCE_STATUS_ERROR;
} else { } else {
@ -63,15 +71,8 @@ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
} }
} }
/** static int __pm_clk_add(struct device *dev, const char *con_id,
* pm_clk_add - Start using a device clock for power management. struct clk *clk)
* @dev: Device whose clock is going to be used for power management.
* @con_id: Connection ID of the clock.
*
* Add the clock represented by @con_id to the list of clocks used for
* the power management of @dev.
*/
int pm_clk_add(struct device *dev, const char *con_id)
{ {
struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_subsys_data *psd = dev_to_psd(dev);
struct pm_clock_entry *ce; struct pm_clock_entry *ce;
@ -93,6 +94,12 @@ int pm_clk_add(struct device *dev, const char *con_id)
kfree(ce); kfree(ce);
return -ENOMEM; return -ENOMEM;
} }
} else {
if (IS_ERR(ce->clk) || !__clk_get(clk)) {
kfree(ce);
return -ENOENT;
}
ce->clk = clk;
} }
pm_clk_acquire(dev, ce); pm_clk_acquire(dev, ce);
@ -103,6 +110,32 @@ int pm_clk_add(struct device *dev, const char *con_id)
return 0; return 0;
} }
/**
* pm_clk_add - Start using a device clock for power management.
* @dev: Device whose clock is going to be used for power management.
* @con_id: Connection ID of the clock.
*
* Add the clock represented by @con_id to the list of clocks used for
* the power management of @dev.
*/
int pm_clk_add(struct device *dev, const char *con_id)
{
return __pm_clk_add(dev, con_id, NULL);
}
/**
* pm_clk_add_clk - Start using a device clock for power management.
* @dev: Device whose clock is going to be used for power management.
* @clk: Clock pointer
*
* Add the clock to the list of clocks used for the power management of @dev.
* It will increment refcount on clock pointer, use clk_put() on it when done.
*/
int pm_clk_add_clk(struct device *dev, struct clk *clk)
{
return __pm_clk_add(dev, NULL, clk);
}
/** /**
* __pm_clk_remove - Destroy PM clock entry. * __pm_clk_remove - Destroy PM clock entry.
* @ce: PM clock entry to destroy. * @ce: PM clock entry to destroy.
@ -266,7 +299,6 @@ int pm_clk_resume(struct device *dev)
struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_subsys_data *psd = dev_to_psd(dev);
struct pm_clock_entry *ce; struct pm_clock_entry *ce;
unsigned long flags; unsigned long flags;
int ret;
dev_dbg(dev, "%s()\n", __func__); dev_dbg(dev, "%s()\n", __func__);
@ -275,13 +307,8 @@ int pm_clk_resume(struct device *dev)
spin_lock_irqsave(&psd->lock, flags); spin_lock_irqsave(&psd->lock, flags);
list_for_each_entry(ce, &psd->clock_list, node) { list_for_each_entry(ce, &psd->clock_list, node)
if (ce->status < PCE_STATUS_ERROR) { __pm_clk_enable(dev, ce);
ret = __pm_clk_enable(dev, ce->clk);
if (!ret)
ce->status = PCE_STATUS_ENABLED;
}
}
spin_unlock_irqrestore(&psd->lock, flags); spin_unlock_irqrestore(&psd->lock, flags);
@ -390,7 +417,6 @@ int pm_clk_resume(struct device *dev)
struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_subsys_data *psd = dev_to_psd(dev);
struct pm_clock_entry *ce; struct pm_clock_entry *ce;
unsigned long flags; unsigned long flags;
int ret;
dev_dbg(dev, "%s()\n", __func__); dev_dbg(dev, "%s()\n", __func__);
@ -400,13 +426,8 @@ int pm_clk_resume(struct device *dev)
spin_lock_irqsave(&psd->lock, flags); spin_lock_irqsave(&psd->lock, flags);
list_for_each_entry(ce, &psd->clock_list, node) { list_for_each_entry(ce, &psd->clock_list, node)
if (ce->status < PCE_STATUS_ERROR) { __pm_clk_enable(dev, ce);
ret = __pm_clk_enable(dev, ce->clk);
if (!ret)
ce->status = PCE_STATUS_ENABLED;
}
}
spin_unlock_irqrestore(&psd->lock, flags); spin_unlock_irqrestore(&psd->lock, flags);

View File

@ -49,11 +49,12 @@
* are protected by the dev_opp_list_lock for integrity. * are protected by the dev_opp_list_lock for integrity.
* IMPORTANT: the opp nodes should be maintained in increasing * IMPORTANT: the opp nodes should be maintained in increasing
* order. * order.
* @dynamic: not-created from static DT entries.
* @available: true/false - marks if this OPP as available or not * @available: true/false - marks if this OPP as available or not
* @rate: Frequency in hertz * @rate: Frequency in hertz
* @u_volt: Nominal voltage in microvolts corresponding to this OPP * @u_volt: Nominal voltage in microvolts corresponding to this OPP
* @dev_opp: points back to the device_opp struct this opp belongs to * @dev_opp: points back to the device_opp struct this opp belongs to
* @head: RCU callback head used for deferred freeing * @rcu_head: RCU callback head used for deferred freeing
* *
* This structure stores the OPP information for a given device. * This structure stores the OPP information for a given device.
*/ */
@ -61,11 +62,12 @@ struct dev_pm_opp {
struct list_head node; struct list_head node;
bool available; bool available;
bool dynamic;
unsigned long rate; unsigned long rate;
unsigned long u_volt; unsigned long u_volt;
struct device_opp *dev_opp; struct device_opp *dev_opp;
struct rcu_head head; struct rcu_head rcu_head;
}; };
/** /**
@ -76,7 +78,8 @@ struct dev_pm_opp {
* RCU usage: nodes are not modified in the list of device_opp, * RCU usage: nodes are not modified in the list of device_opp,
* however addition is possible and is secured by dev_opp_list_lock * however addition is possible and is secured by dev_opp_list_lock
* @dev: device pointer * @dev: device pointer
* @head: notifier head to notify the OPP availability changes. * @srcu_head: notifier head to notify the OPP availability changes.
* @rcu_head: RCU callback head used for deferred freeing
* @opp_list: list of opps * @opp_list: list of opps
* *
* This is an internal data structure maintaining the link to opps attached to * This is an internal data structure maintaining the link to opps attached to
@ -87,7 +90,8 @@ struct device_opp {
struct list_head node; struct list_head node;
struct device *dev; struct device *dev;
struct srcu_notifier_head head; struct srcu_notifier_head srcu_head;
struct rcu_head rcu_head;
struct list_head opp_list; struct list_head opp_list;
}; };
@ -378,30 +382,8 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
} }
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
/** static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
* dev_pm_opp_add() - Add an OPP table from a table definitions unsigned long u_volt, bool dynamic)
* @dev: device for which we do this operation
* @freq: Frequency in Hz for this OPP
* @u_volt: Voltage in uVolts for this OPP
*
* This function adds an opp definition to the opp list and returns status.
* The opp is made available by default and it can be controlled using
* dev_pm_opp_enable/disable functions.
*
* Locking: The internal device_opp and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
* mutex cannot be locked.
*
* Return:
* 0: On success OR
* Duplicate OPPs (both freq and volt are same) and opp->available
* -EEXIST: Freq are same and volt are different OR
* Duplicate OPPs (both freq and volt are same) and !opp->available
* -ENOMEM: Memory allocation failure
*/
int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
{ {
struct device_opp *dev_opp = NULL; struct device_opp *dev_opp = NULL;
struct dev_pm_opp *opp, *new_opp; struct dev_pm_opp *opp, *new_opp;
@ -417,6 +399,13 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
/* Hold our list modification lock here */ /* Hold our list modification lock here */
mutex_lock(&dev_opp_list_lock); mutex_lock(&dev_opp_list_lock);
/* populate the opp table */
new_opp->dev_opp = dev_opp;
new_opp->rate = freq;
new_opp->u_volt = u_volt;
new_opp->available = true;
new_opp->dynamic = dynamic;
/* Check for existing list for 'dev' */ /* Check for existing list for 'dev' */
dev_opp = find_device_opp(dev); dev_opp = find_device_opp(dev);
if (IS_ERR(dev_opp)) { if (IS_ERR(dev_opp)) {
@ -436,19 +425,15 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
} }
dev_opp->dev = dev; dev_opp->dev = dev;
srcu_init_notifier_head(&dev_opp->head); srcu_init_notifier_head(&dev_opp->srcu_head);
INIT_LIST_HEAD(&dev_opp->opp_list); INIT_LIST_HEAD(&dev_opp->opp_list);
/* Secure the device list modification */ /* Secure the device list modification */
list_add_rcu(&dev_opp->node, &dev_opp_list); list_add_rcu(&dev_opp->node, &dev_opp_list);
head = &dev_opp->opp_list;
goto list_add;
} }
/* populate the opp table */
new_opp->dev_opp = dev_opp;
new_opp->rate = freq;
new_opp->u_volt = u_volt;
new_opp->available = true;
/* /*
* Insert new OPP in order of increasing frequency * Insert new OPP in order of increasing frequency
* and discard if already present * and discard if already present
@ -474,6 +459,7 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
return ret; return ret;
} }
list_add:
list_add_rcu(&new_opp->node, head); list_add_rcu(&new_opp->node, head);
mutex_unlock(&dev_opp_list_lock); mutex_unlock(&dev_opp_list_lock);
@ -481,11 +467,109 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
* Notify the changes in the availability of the operable * Notify the changes in the availability of the operable
* frequency/voltage list. * frequency/voltage list.
*/ */
srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp); srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
return 0; return 0;
} }
/**
* dev_pm_opp_add() - Add an OPP table from a table definitions
* @dev: device for which we do this operation
* @freq: Frequency in Hz for this OPP
* @u_volt: Voltage in uVolts for this OPP
*
* This function adds an opp definition to the opp list and returns status.
* The opp is made available by default and it can be controlled using
* dev_pm_opp_enable/disable functions.
*
* Locking: The internal device_opp and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
* mutex cannot be locked.
*
* Return:
* 0: On success OR
* Duplicate OPPs (both freq and volt are same) and opp->available
* -EEXIST: Freq are same and volt are different OR
* Duplicate OPPs (both freq and volt are same) and !opp->available
* -ENOMEM: Memory allocation failure
*/
int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
{
return dev_pm_opp_add_dynamic(dev, freq, u_volt, true);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_add); EXPORT_SYMBOL_GPL(dev_pm_opp_add);
static void kfree_opp_rcu(struct rcu_head *head)
{
struct dev_pm_opp *opp = container_of(head, struct dev_pm_opp, rcu_head);
kfree_rcu(opp, rcu_head);
}
static void kfree_device_rcu(struct rcu_head *head)
{
struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
kfree(device_opp);
}
void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp)
{
/*
* Notify the changes in the availability of the operable
* frequency/voltage list.
*/
srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
list_del_rcu(&opp->node);
call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, kfree_opp_rcu);
if (list_empty(&dev_opp->opp_list)) {
list_del_rcu(&dev_opp->node);
call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head,
kfree_device_rcu);
}
}
/**
* dev_pm_opp_remove() - Remove an OPP from OPP list
* @dev: device for which we do this operation
* @freq: OPP to remove with matching 'freq'
*
* This function removes an opp from the opp list.
*/
void dev_pm_opp_remove(struct device *dev, unsigned long freq)
{
struct dev_pm_opp *opp;
struct device_opp *dev_opp;
bool found = false;
/* Hold our list modification lock here */
mutex_lock(&dev_opp_list_lock);
dev_opp = find_device_opp(dev);
if (IS_ERR(dev_opp))
goto unlock;
list_for_each_entry(opp, &dev_opp->opp_list, node) {
if (opp->rate == freq) {
found = true;
break;
}
}
if (!found) {
dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n",
__func__, freq);
goto unlock;
}
__dev_pm_opp_remove(dev_opp, opp);
unlock:
mutex_unlock(&dev_opp_list_lock);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
/** /**
* opp_set_availability() - helper to set the availability of an opp * opp_set_availability() - helper to set the availability of an opp
* @dev: device for which we do this operation * @dev: device for which we do this operation
@ -557,14 +641,14 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
list_replace_rcu(&opp->node, &new_opp->node); list_replace_rcu(&opp->node, &new_opp->node);
mutex_unlock(&dev_opp_list_lock); mutex_unlock(&dev_opp_list_lock);
kfree_rcu(opp, head); call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, kfree_opp_rcu);
/* Notify the change of the OPP availability */ /* Notify the change of the OPP availability */
if (availability_req) if (availability_req)
srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ENABLE, srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE,
new_opp); new_opp);
else else
srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE, srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE,
new_opp); new_opp);
return 0; return 0;
@ -629,7 +713,7 @@ struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
if (IS_ERR(dev_opp)) if (IS_ERR(dev_opp))
return ERR_CAST(dev_opp); /* matching type */ return ERR_CAST(dev_opp); /* matching type */
return &dev_opp->head; return &dev_opp->srcu_head;
} }
#ifdef CONFIG_OF #ifdef CONFIG_OF
@ -666,7 +750,7 @@ int of_init_opp_table(struct device *dev)
unsigned long freq = be32_to_cpup(val++) * 1000; unsigned long freq = be32_to_cpup(val++) * 1000;
unsigned long volt = be32_to_cpup(val++); unsigned long volt = be32_to_cpup(val++);
if (dev_pm_opp_add(dev, freq, volt)) if (dev_pm_opp_add_dynamic(dev, freq, volt, false))
dev_warn(dev, "%s: Failed to add OPP %ld\n", dev_warn(dev, "%s: Failed to add OPP %ld\n",
__func__, freq); __func__, freq);
nr -= 2; nr -= 2;
@ -675,4 +759,34 @@ int of_init_opp_table(struct device *dev)
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(of_init_opp_table); EXPORT_SYMBOL_GPL(of_init_opp_table);
/**
* of_free_opp_table() - Free OPP table entries created from static DT entries
* @dev: device pointer used to lookup device OPPs.
*
* Free OPPs created using static entries present in DT.
*/
void of_free_opp_table(struct device *dev)
{
struct device_opp *dev_opp = find_device_opp(dev);
struct dev_pm_opp *opp, *tmp;
/* Check for existing list for 'dev' */
dev_opp = find_device_opp(dev);
if (WARN(IS_ERR(dev_opp), "%s: dev_opp: %ld\n", dev_name(dev),
PTR_ERR(dev_opp)))
return;
/* Hold our list modification lock here */
mutex_lock(&dev_opp_list_lock);
/* Free static OPPs */
list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) {
if (!opp->dynamic)
__dev_pm_opp_remove(dev_opp, opp);
}
mutex_unlock(&dev_opp_list_lock);
}
EXPORT_SYMBOL_GPL(of_free_opp_table);
#endif #endif

View File

@ -17,7 +17,7 @@ if POWERCAP
# Client driver configurations go here. # Client driver configurations go here.
config INTEL_RAPL config INTEL_RAPL
tristate "Intel RAPL Support" tristate "Intel RAPL Support"
depends on X86 depends on X86 && IOSF_MBI
default n default n
---help--- ---help---
This enables support for the Intel Running Average Power Limit (RAPL) This enables support for the Intel Running Average Power Limit (RAPL)

View File

@ -29,6 +29,7 @@
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/powercap.h> #include <linux/powercap.h>
#include <asm/iosf_mbi.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
@ -70,11 +71,6 @@
#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
#define RAPL_PRIMITIVE_DUMMY BIT(2) #define RAPL_PRIMITIVE_DUMMY BIT(2)
/* scale RAPL units to avoid floating point math inside kernel */
#define POWER_UNIT_SCALE (1000000)
#define ENERGY_UNIT_SCALE (1000000)
#define TIME_UNIT_SCALE (1000000)
#define TIME_WINDOW_MAX_MSEC 40000 #define TIME_WINDOW_MAX_MSEC 40000
#define TIME_WINDOW_MIN_MSEC 250 #define TIME_WINDOW_MIN_MSEC 250
@ -175,9 +171,9 @@ struct rapl_package {
unsigned int id; /* physical package/socket id */ unsigned int id; /* physical package/socket id */
unsigned int nr_domains; unsigned int nr_domains;
unsigned long domain_map; /* bit map of active domains */ unsigned long domain_map; /* bit map of active domains */
unsigned int power_unit_divisor; unsigned int power_unit;
unsigned int energy_unit_divisor; unsigned int energy_unit;
unsigned int time_unit_divisor; unsigned int time_unit;
struct rapl_domain *domains; /* array of domains, sized at runtime */ struct rapl_domain *domains; /* array of domains, sized at runtime */
struct powercap_zone *power_zone; /* keep track of parent zone */ struct powercap_zone *power_zone; /* keep track of parent zone */
int nr_cpus; /* active cpus on the package, topology info is lost during int nr_cpus; /* active cpus on the package, topology info is lost during
@ -188,6 +184,18 @@ struct rapl_package {
*/ */
struct list_head plist; struct list_head plist;
}; };
struct rapl_defaults {
int (*check_unit)(struct rapl_package *rp, int cpu);
void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
bool to_raw);
};
static struct rapl_defaults *rapl_defaults;
/* Sideband MBI registers */
#define IOSF_CPU_POWER_BUDGET_CTL (0x2)
#define PACKAGE_PLN_INT_SAVED BIT(0) #define PACKAGE_PLN_INT_SAVED BIT(0)
#define MAX_PRIM_NAME (32) #define MAX_PRIM_NAME (32)
@ -339,23 +347,13 @@ static int find_nr_power_limit(struct rapl_domain *rd)
static int set_domain_enable(struct powercap_zone *power_zone, bool mode) static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{ {
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
int nr_powerlimit;
if (rd->state & DOMAIN_STATE_BIOS_LOCKED) if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES; return -EACCES;
get_online_cpus(); get_online_cpus();
nr_powerlimit = find_nr_power_limit(rd);
/* here we activate/deactivate the hardware for power limiting */
rapl_write_data_raw(rd, PL1_ENABLE, mode); rapl_write_data_raw(rd, PL1_ENABLE, mode);
/* always enable clamp such that p-state can go below OS requested rapl_defaults->set_floor_freq(rd, mode);
* range. power capping priority over guranteed frequency.
*/
rapl_write_data_raw(rd, PL1_CLAMP, mode);
/* some domains have pl2 */
if (nr_powerlimit > 1) {
rapl_write_data_raw(rd, PL2_ENABLE, mode);
rapl_write_data_raw(rd, PL2_CLAMP, mode);
}
put_online_cpus(); put_online_cpus();
return 0; return 0;
@ -653,9 +651,7 @@ static void rapl_init_domains(struct rapl_package *rp)
static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
int to_raw) int to_raw)
{ {
u64 divisor = 1; u64 units = 1;
int scale = 1; /* scale to user friendly data without floating point */
u64 f, y; /* fraction and exp. used for time unit */
struct rapl_package *rp; struct rapl_package *rp;
rp = find_package_by_id(package); rp = find_package_by_id(package);
@ -664,42 +660,24 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
switch (type) { switch (type) {
case POWER_UNIT: case POWER_UNIT:
divisor = rp->power_unit_divisor; units = rp->power_unit;
scale = POWER_UNIT_SCALE;
break; break;
case ENERGY_UNIT: case ENERGY_UNIT:
scale = ENERGY_UNIT_SCALE; units = rp->energy_unit;
divisor = rp->energy_unit_divisor;
break; break;
case TIME_UNIT: case TIME_UNIT:
divisor = rp->time_unit_divisor; return rapl_defaults->compute_time_window(rp, value, to_raw);
scale = TIME_UNIT_SCALE;
/* special processing based on 2^Y*(1+F)/4 = val/divisor, refer
* to Intel Software Developer's manual Vol. 3a, CH 14.7.4.
*/
if (!to_raw) {
f = (value & 0x60) >> 5;
y = value & 0x1f;
value = (1 << y) * (4 + f) * scale / 4;
return div64_u64(value, divisor);
} else {
do_div(value, scale);
value *= divisor;
y = ilog2(value);
f = div64_u64(4 * (value - (1 << y)), 1 << y);
value = (y & 0x1f) | ((f & 0x3) << 5);
return value;
}
break;
case ARBITRARY_UNIT: case ARBITRARY_UNIT:
default: default:
return value; return value;
}; };
if (to_raw) if (to_raw)
return div64_u64(value * divisor, scale); return div64_u64(value, units);
else
return div64_u64(value * scale, divisor); value *= units;
return value;
} }
/* in the order of enum rapl_primitives */ /* in the order of enum rapl_primitives */
@ -833,12 +811,18 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
return 0; return 0;
} }
static const struct x86_cpu_id energy_unit_quirk_ids[] = { /*
{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ * Raw RAPL data stored in MSRs are in certain scales. We need to
{} * convert them into standard units based on the units reported in
}; * the RAPL unit MSRs. This is specific to CPUs as the method to
* calculate units differ on different CPUs.
static int rapl_check_unit(struct rapl_package *rp, int cpu) * We convert the units to below format based on CPUs.
* i.e.
* energy unit: microJoules : Represented in microJoules by default
* power unit : microWatts : Represented in milliWatts by default
* time unit : microseconds: Represented in seconds by default
*/
static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
{ {
u64 msr_val; u64 msr_val;
u32 value; u32 value;
@ -849,36 +833,47 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu)
return -ENODEV; return -ENODEV;
} }
/* Raw RAPL data stored in MSRs are in certain scales. We need to
* convert them into standard units based on the divisors reported in
* the RAPL unit MSRs.
* i.e.
* energy unit: 1/enery_unit_divisor Joules
* power unit: 1/power_unit_divisor Watts
* time unit: 1/time_unit_divisor Seconds
*/
value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
/* some CPUs have different way to calculate energy unit */ rp->energy_unit = 1000000 / (1 << value);
if (x86_match_cpu(energy_unit_quirk_ids))
rp->energy_unit_divisor = 1000000 / (1 << value);
else
rp->energy_unit_divisor = 1 << value;
value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
rp->power_unit_divisor = 1 << value; rp->power_unit = 1000000 / (1 << value);
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit_divisor = 1 << value; rp->time_unit = 1000000 / (1 << value);
pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n", pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n",
rp->id, rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
rp->energy_unit_divisor,
rp->time_unit_divisor,
rp->power_unit_divisor);
return 0; return 0;
} }
static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
{
u64 msr_val;
u32 value;
if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) {
pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n",
MSR_RAPL_POWER_UNIT, cpu);
return -ENODEV;
}
value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
rp->energy_unit = 1 << value;
value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
rp->power_unit = (1 << value) * 1000;
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 1000000 / (1 << value);
pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n",
rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
return 0;
}
/* REVISIT: /* REVISIT:
* When package power limit is set artificially low by RAPL, LVT * When package power limit is set artificially low by RAPL, LVT
* thermal interrupt for package power limit should be ignored * thermal interrupt for package power limit should be ignored
@ -946,16 +941,107 @@ static void package_power_limit_irq_restore(int package_id)
wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
} }
static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
{
int nr_powerlimit = find_nr_power_limit(rd);
/* always enable clamp such that p-state can go below OS requested
* range. power capping priority over guranteed frequency.
*/
rapl_write_data_raw(rd, PL1_CLAMP, mode);
/* some domains have pl2 */
if (nr_powerlimit > 1) {
rapl_write_data_raw(rd, PL2_ENABLE, mode);
rapl_write_data_raw(rd, PL2_CLAMP, mode);
}
}
static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
{
static u32 power_ctrl_orig_val;
u32 mdata;
if (!power_ctrl_orig_val)
iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
IOSF_CPU_POWER_BUDGET_CTL, &power_ctrl_orig_val);
mdata = power_ctrl_orig_val;
if (enable) {
mdata &= ~(0x7f << 8);
mdata |= 1 << 8;
}
iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
IOSF_CPU_POWER_BUDGET_CTL, mdata);
}
static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
bool to_raw)
{
u64 f, y; /* fraction and exp. used for time unit */
/*
* Special processing based on 2^Y*(1+F/4), refer
* to Intel Software Developer's manual Vol.3B: CH 14.9.3.
*/
if (!to_raw) {
f = (value & 0x60) >> 5;
y = value & 0x1f;
value = (1 << y) * (4 + f) * rp->time_unit / 4;
} else {
do_div(value, rp->time_unit);
y = ilog2(value);
f = div64_u64(4 * (value - (1 << y)), 1 << y);
value = (y & 0x1f) | ((f & 0x3) << 5);
}
return value;
}
static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
bool to_raw)
{
/*
* Atom time unit encoding is straight forward val * time_unit,
* where time_unit is default to 1 sec. Never 0.
*/
if (!to_raw)
return (value) ? value *= rp->time_unit : rp->time_unit;
else
value = div64_u64(value, rp->time_unit);
return value;
}
static const struct rapl_defaults rapl_defaults_core = {
.check_unit = rapl_check_unit_core,
.set_floor_freq = set_floor_freq_default,
.compute_time_window = rapl_compute_time_window_core,
};
static const struct rapl_defaults rapl_defaults_atom = {
.check_unit = rapl_check_unit_atom,
.set_floor_freq = set_floor_freq_atom,
.compute_time_window = rapl_compute_time_window_atom,
};
#define RAPL_CPU(_model, _ops) { \
.vendor = X86_VENDOR_INTEL, \
.family = 6, \
.model = _model, \
.driver_data = (kernel_ulong_t)&_ops, \
}
static const struct x86_cpu_id rapl_ids[] = { static const struct x86_cpu_id rapl_ids[] = {
{ X86_VENDOR_INTEL, 6, 0x2a},/* Sandy Bridge */ RAPL_CPU(0x2a, rapl_defaults_core),/* Sandy Bridge */
{ X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */ RAPL_CPU(0x2d, rapl_defaults_core),/* Sandy Bridge EP */
{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ RAPL_CPU(0x37, rapl_defaults_atom),/* Valleyview */
{ X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */ RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */
{ X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */ RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */
{ X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */ RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */
{ X86_VENDOR_INTEL, 6, 0x3f},/* Haswell */ RAPL_CPU(0x3f, rapl_defaults_core),/* Haswell */
{ X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */ RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
/* TODO: Add more CPU IDs after testing */ RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */
RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */
RAPL_CPU(0x5A, rapl_defaults_atom),/* Annidale */
{} {}
}; };
MODULE_DEVICE_TABLE(x86cpu, rapl_ids); MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
@ -1241,7 +1327,7 @@ static int rapl_detect_topology(void)
/* check if the package contains valid domains */ /* check if the package contains valid domains */
if (rapl_detect_domains(new_package, i) || if (rapl_detect_domains(new_package, i) ||
rapl_check_unit(new_package, i)) { rapl_defaults->check_unit(new_package, i)) {
kfree(new_package->domains); kfree(new_package->domains);
kfree(new_package); kfree(new_package);
/* free up the packages already initialized */ /* free up the packages already initialized */
@ -1296,7 +1382,7 @@ static int rapl_add_package(int cpu)
rp->nr_cpus = 1; rp->nr_cpus = 1;
/* check if the package contains valid domains */ /* check if the package contains valid domains */
if (rapl_detect_domains(rp, cpu) || if (rapl_detect_domains(rp, cpu) ||
rapl_check_unit(rp, cpu)) { rapl_defaults->check_unit(rp, cpu)) {
ret = -ENODEV; ret = -ENODEV;
goto err_free_package; goto err_free_package;
} }
@ -1358,14 +1444,18 @@ static struct notifier_block rapl_cpu_notifier = {
static int __init rapl_init(void) static int __init rapl_init(void)
{ {
int ret = 0; int ret = 0;
const struct x86_cpu_id *id;
if (!x86_match_cpu(rapl_ids)) { id = x86_match_cpu(rapl_ids);
if (!id) {
pr_err("driver does not support CPU family %d model %d\n", pr_err("driver does not support CPU family %d model %d\n",
boot_cpu_data.x86, boot_cpu_data.x86_model); boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV; return -ENODEV;
} }
rapl_defaults = (struct rapl_defaults *)id->driver_data;
cpu_notifier_register_begin(); cpu_notifier_register_begin();
/* prevent CPU hotplug during detection */ /* prevent CPU hotplug during detection */

View File

@ -18,6 +18,8 @@ struct pm_clk_notifier_block {
char *con_ids[]; char *con_ids[];
}; };
struct clk;
#ifdef CONFIG_PM_CLK #ifdef CONFIG_PM_CLK
static inline bool pm_clk_no_clocks(struct device *dev) static inline bool pm_clk_no_clocks(struct device *dev)
{ {
@ -29,6 +31,7 @@ extern void pm_clk_init(struct device *dev);
extern int pm_clk_create(struct device *dev); extern int pm_clk_create(struct device *dev);
extern void pm_clk_destroy(struct device *dev); extern void pm_clk_destroy(struct device *dev);
extern int pm_clk_add(struct device *dev, const char *con_id); extern int pm_clk_add(struct device *dev, const char *con_id);
extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
extern void pm_clk_remove(struct device *dev, const char *con_id); extern void pm_clk_remove(struct device *dev, const char *con_id);
extern int pm_clk_suspend(struct device *dev); extern int pm_clk_suspend(struct device *dev);
extern int pm_clk_resume(struct device *dev); extern int pm_clk_resume(struct device *dev);
@ -51,6 +54,11 @@ static inline int pm_clk_add(struct device *dev, const char *con_id)
{ {
return -EINVAL; return -EINVAL;
} }
static inline int pm_clk_add_clk(struct device *dev, struct clk *clk)
{
return -EINVAL;
}
static inline void pm_clk_remove(struct device *dev, const char *con_id) static inline void pm_clk_remove(struct device *dev, const char *con_id)
{ {
} }

View File

@ -21,7 +21,7 @@ struct dev_pm_opp;
struct device; struct device;
enum dev_pm_opp_event { enum dev_pm_opp_event {
OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
}; };
#if defined(CONFIG_PM_OPP) #if defined(CONFIG_PM_OPP)
@ -44,6 +44,7 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
int dev_pm_opp_add(struct device *dev, unsigned long freq, int dev_pm_opp_add(struct device *dev, unsigned long freq,
unsigned long u_volt); unsigned long u_volt);
void dev_pm_opp_remove(struct device *dev, unsigned long freq);
int dev_pm_opp_enable(struct device *dev, unsigned long freq); int dev_pm_opp_enable(struct device *dev, unsigned long freq);
@ -90,6 +91,10 @@ static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
return -EINVAL; return -EINVAL;
} }
static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq)
{
}
static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq) static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq)
{ {
return 0; return 0;
@ -109,11 +114,16 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
int of_init_opp_table(struct device *dev); int of_init_opp_table(struct device *dev);
void of_free_opp_table(struct device *dev);
#else #else
static inline int of_init_opp_table(struct device *dev) static inline int of_init_opp_table(struct device *dev)
{ {
return -EINVAL; return -EINVAL;
} }
static inline void of_free_opp_table(struct device *dev)
{
}
#endif #endif
#endif /* __LINUX_OPP_H__ */ #endif /* __LINUX_OPP_H__ */

View File

@ -308,4 +308,3 @@ config PM_GENERIC_DOMAINS_OF
config CPU_PM config CPU_PM
bool bool
depends on SUSPEND || CPU_IDLE