diff --git a/Documentation/admin-guide/pm/intel-speed-select.rst b/Documentation/admin-guide/pm/intel-speed-select.rst index 0a1fbdb54bfe..a2bfb971654f 100644 --- a/Documentation/admin-guide/pm/intel-speed-select.rst +++ b/Documentation/admin-guide/pm/intel-speed-select.rst @@ -262,6 +262,28 @@ Which shows that the base frequency now increased from 2600 MHz at performance level 0 to 2800 MHz at performance level 4. As a result, any workload, which can use fewer CPUs, can see a boost of 200 MHz compared to performance level 0. +Changing performance level via BMC Interface +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is possible to change SST-PP level using out of band (OOB) agent (Via some +remote management console, through BMC "Baseboard Management Controller" +interface). This mode is supported from the Sapphire Rapids processor +generation. The kernel and tool change to support this mode is added to Linux +kernel version 5.18. To enable this feature, kernel config +"CONFIG_INTEL_HFI_THERMAL" is required. The minimum version of the tool +is "v1.12" to support this feature, which is part of Linux kernel version 5.18. + +To support such configuration, this tool can be used as a daemon. Add +a command line option --oob:: + + # intel-speed-select --oob + Intel(R) Speed Select Technology + Executing on CPU model:143[0x8f] + OOB mode is enabled and will run as daemon + +In this mode the tool will online/offline CPUs based on the new performance +level. + Check presence of other Intel(R) SST features --------------------------------------------- diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml new file mode 100644 index 000000000000..8c6543b5c0dc --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra-ccplex-cluster.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: NVIDIA Tegra CPU COMPLEX CLUSTER area device tree bindings + +maintainers: + - Sumit Gupta + - Mikko Perttunen + - Jon Hunter + - Thierry Reding + +description: |+ + The Tegra CPU COMPLEX CLUSTER area contains memory-mapped + registers that initiate CPU frequency/voltage transitions. + +properties: + $nodename: + pattern: "ccplex@([0-9a-f]+)$" + + compatible: + enum: + - nvidia,tegra186-ccplex-cluster + - nvidia,tegra234-ccplex-cluster + + reg: + maxItems: 1 + + nvidia,bpmp: + $ref: '/schemas/types.yaml#/definitions/phandle' + description: | + Specifies the BPMP node that needs to be queried to get + operating point data for all CPUs. + +additionalProperties: false + +required: + - compatible + - reg + - nvidia,bpmp + - status + +examples: + - | + ccplex@e000000 { + compatible = "nvidia,tegra234-ccplex-cluster"; + reg = <0x0e000000 0x5ffff>; + nvidia,bpmp = <&bpmp>; + status = "okay"; + }; diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt index b8233ec91d3d..e0a4ba599abc 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt @@ -20,6 +20,13 @@ Optional properties: Vsram to fit SoC specific needs. When absent, the voltage scaling flow is handled by hardware, hence no software "voltage tracking" is needed. +- mediatek,cci: + Used to confirm the link status between cpufreq and mediatek cci. Because + cpufreq and mediatek cci could share the same regulator in some MediaTek SoCs. + To prevent the issue of high frequency and low voltage, we need to use this + property to make sure mediatek cci is ready. + For details of mediatek cci, please refer to + Documentation/devicetree/bindings/interconnect/mediatek,cci.yaml - #cooling-cells: For details, please refer to Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml diff --git a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml index 8c2e9ac5f68d..30f7b596d609 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml @@ -17,10 +17,10 @@ description: | the CPU frequencies subset and voltage value of each OPP varies based on the silicon variant in use. Qualcomm Technologies, Inc. Process Voltage Scaling Tables - defines the voltage and frequency value based on the msm-id in SMEM - and speedbin blown in the efuse combination. - The qcom-cpufreq-nvmem driver reads the msm-id and efuse value from the SoC - to provide the OPP framework with required information (existing HW bitmap). + defines the voltage and frequency value based on the speedbin blown in + the efuse combination. + The qcom-cpufreq-nvmem driver reads the efuse value from the SoC to provide + the OPP framework with required information (existing HW bitmap). This is used to determine the voltage and frequency value for each OPP of operating-points-v2 table when it is parsed by the OPP framework. @@ -50,15 +50,11 @@ patternProperties: description: | A single 32 bit bitmap value, representing compatible HW. Bitmap: - 0: MSM8996 V3, speedbin 0 - 1: MSM8996 V3, speedbin 1 - 2: MSM8996 V3, speedbin 2 - 3: unused - 4: MSM8996 SG, speedbin 0 - 5: MSM8996 SG, speedbin 1 - 6: MSM8996 SG, speedbin 2 - 7-31: unused - maximum: 0x77 + 0: MSM8996, speedbin 0 + 1: MSM8996, speedbin 1 + 2: MSM8996, speedbin 2 + 3-31: unused + maximum: 0x7 clock-latency-ns: true @@ -184,19 +180,19 @@ examples: opp-307200000 { opp-hz = /bits/ 64 <307200000>; opp-microvolt = <905000 905000 1140000>; - opp-supported-hw = <0x77>; + opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1401600000 { + opp-hz = /bits/ 64 <1401600000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x5>; clock-latency-ns = <200000>; }; opp-1593600000 { opp-hz = /bits/ 64 <1593600000>; opp-microvolt = <1140000 905000 1140000>; - opp-supported-hw = <0x71>; - clock-latency-ns = <200000>; - }; - opp-2188800000 { - opp-hz = /bits/ 64 <2188800000>; - opp-microvolt = <1140000 905000 1140000>; - opp-supported-hw = <0x10>; + opp-supported-hw = <0x1>; clock-latency-ns = <200000>; }; }; @@ -209,25 +205,25 @@ examples: opp-307200000 { opp-hz = /bits/ 64 <307200000>; opp-microvolt = <905000 905000 1140000>; - opp-supported-hw = <0x77>; + opp-supported-hw = <0x7>; clock-latency-ns = <200000>; }; - opp-1593600000 { - opp-hz = /bits/ 64 <1593600000>; + opp-1804800000 { + opp-hz = /bits/ 64 <1804800000>; opp-microvolt = <1140000 905000 1140000>; - opp-supported-hw = <0x70>; + opp-supported-hw = <0x6>; + clock-latency-ns = <200000>; + }; + opp-1900800000 { + opp-hz = /bits/ 64 <1900800000>; + opp-microvolt = <1140000 905000 1140000>; + opp-supported-hw = <0x4>; clock-latency-ns = <200000>; }; opp-2150400000 { opp-hz = /bits/ 64 <2150400000>; opp-microvolt = <1140000 905000 1140000>; - opp-supported-hw = <0x31>; - clock-latency-ns = <200000>; - }; - opp-2342400000 { - opp-hz = /bits/ 64 <2342400000>; - opp-microvolt = <1140000 905000 1140000>; - opp-supported-hw = <0x10>; + opp-supported-hw = <0x1>; clock-latency-ns = <200000>; }; }; diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 3044fcb8d073..2cb943422554 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -116,9 +116,7 @@ void machine_power_off(void) { local_irq_disable(); smp_send_stop(); - - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 9734c9fb1a32..2f42123e059f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -111,8 +111,7 @@ void machine_power_off(void) { local_irq_disable(); smp_send_stop(); - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } /* diff --git a/arch/csky/kernel/power.c b/arch/csky/kernel/power.c index 923ee4e381b8..86ee202906f8 100644 --- a/arch/csky/kernel/power.c +++ b/arch/csky/kernel/power.c @@ -9,16 +9,14 @@ EXPORT_SYMBOL(pm_power_off); void machine_power_off(void) { local_irq_disable(); - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); asm volatile ("bkpt"); } void machine_halt(void) { local_irq_disable(); - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); asm volatile ("bkpt"); } diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index d7a256bd9d6b..89025e3b3f61 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -599,8 +600,7 @@ machine_halt (void) void machine_power_off (void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); machine_halt(); } diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c index 71b78ecee75c..b19dc00026d9 100644 --- a/arch/m68k/emu/natfeat.c +++ b/arch/m68k/emu/natfeat.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -90,5 +91,5 @@ void __init nf_init(void) pr_info("NatFeats found (%s, %lu.%lu)\n", buf, version >> 16, version & 0xffff); - mach_power_off = nf_poweroff; + register_platform_power_off(nf_poweroff); } diff --git a/arch/m68k/include/asm/machdep.h b/arch/m68k/include/asm/machdep.h index 841ba6aa3fcb..48d27f1fecc7 100644 --- a/arch/m68k/include/asm/machdep.h +++ b/arch/m68k/include/asm/machdep.h @@ -23,7 +23,6 @@ extern int (*mach_get_rtc_pll)(struct rtc_pll_info *); extern int (*mach_set_rtc_pll)(struct rtc_pll_info *); extern void (*mach_reset)( void ); extern void (*mach_halt)( void ); -extern void (*mach_power_off)( void ); extern unsigned long (*mach_hd_init) (unsigned long, unsigned long); extern void (*mach_hd_setup)(char *, int *); extern void (*mach_heartbeat) (int); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index a6030dbaa089..e160a7c57bd3 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -67,12 +67,11 @@ void machine_halt(void) void machine_power_off(void) { - if (mach_power_off) - mach_power_off(); + do_kernel_power_off(); for (;;); } -void (*pm_power_off)(void) = machine_power_off; +void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); void show_regs(struct pt_regs * regs) diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 656841defd2a..e62fa8f2149b 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -89,7 +89,6 @@ void (*mach_get_model) (char *model); void (*mach_get_hardware_list) (struct seq_file *m); void (*mach_reset)( void ); void (*mach_halt)( void ); -void (*mach_power_off)( void ); #ifdef CONFIG_HEARTBEAT void (*mach_heartbeat) (int); EXPORT_SYMBOL(mach_heartbeat); diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 19eea73d3c17..cb6def585851 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -54,7 +54,6 @@ void (*mach_sched_init)(void) __initdata = NULL; /* machine dependent reboot functions */ void (*mach_reset)(void); void (*mach_halt)(void); -void (*mach_power_off)(void); #ifdef CONFIG_M68000 #if defined(CONFIG_M68328) diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 65d124ec80bb..382f656c29ea 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -140,7 +141,6 @@ void __init config_mac(void) mach_hwclk = mac_hwclk; mach_reset = mac_reset; mach_halt = mac_poweroff; - mach_power_off = mac_poweroff; #if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) mach_beep = mac_mksound; #endif @@ -160,6 +160,8 @@ void __init config_mac(void) if (macintosh_config->ident == MAC_MODEL_IICI) mach_l2_flush = via_l2_flush; + + register_platform_power_off(mac_poweroff); } diff --git a/arch/m68k/virt/config.c b/arch/m68k/virt/config.c index 68d29c8b87e1..632ba200ad42 100644 --- a/arch/m68k/virt/config.c +++ b/arch/m68k/virt/config.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -126,5 +127,6 @@ void __init config_virt(void) mach_get_model = virt_get_model; mach_reset = virt_reset; mach_halt = virt_halt; - mach_power_off = virt_halt; + + register_platform_power_off(virt_halt); } diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c index 6288780b779e..e7ce07b3e79b 100644 --- a/arch/mips/kernel/reset.c +++ b/arch/mips/kernel/reset.c @@ -114,8 +114,7 @@ void machine_halt(void) void machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); #ifdef CONFIG_SMP preempt_disable(); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 28b6a2a5574c..d145184696ea 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -116,8 +117,7 @@ void machine_power_off(void) pdc_chassis_send_status(PDC_CHASSIS_DIRECT_SHUTDOWN); /* ipmi_poweroff may have been installed. */ - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); /* It seems we have no way to power the system off via * software. The user has to press the button himself. */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d83d16fef9a..eb0077b302e2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -161,9 +161,7 @@ void machine_restart(char *cmd) void machine_power_off(void) { machine_shutdown(); - if (pm_power_off) - pm_power_off(); - + do_kernel_power_off(); smp_send_stop(); machine_hang(); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index fff81c2300fa..3d9782ea3fa7 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1242,8 +1242,7 @@ static void bootcmds(void) } else if (cmd == 'h') { ppc_md.halt(); } else if (cmd == 'p') { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } } diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index 9c842c41684a..912288572226 100644 --- a/arch/riscv/kernel/reset.c +++ b/arch/riscv/kernel/reset.c @@ -23,16 +23,12 @@ void machine_restart(char *cmd) void machine_halt(void) { - if (pm_power_off != NULL) - pm_power_off(); - else - default_power_off(); + do_kernel_power_off(); + default_power_off(); } void machine_power_off(void) { - if (pm_power_off != NULL) - pm_power_off(); - else - default_power_off(); + do_kernel_power_off(); + default_power_off(); } diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c index 5c33f036418b..e8eeedc9b182 100644 --- a/arch/sh/kernel/reboot.c +++ b/arch/sh/kernel/reboot.c @@ -46,8 +46,7 @@ static void native_machine_shutdown(void) static void native_machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } static void native_machine_halt(void) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index fa700b46588e..c3636ea4aa71 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -739,10 +739,10 @@ static void native_machine_halt(void) static void native_machine_power_off(void) { - if (pm_power_off) { + if (kernel_can_power_off()) { if (!reboot_force) machine_shutdown(); - pm_power_off(); + do_kernel_power_off(); } /* A fallback in case there is no PM info available */ tboot_shutdown(TB_SHUTDOWN_HALT); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ca85d1409917..f33a4421e7cd 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1069,8 +1070,7 @@ static void xen_machine_halt(void) static void xen_machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); xen_reboot(SHUTDOWN_poweroff); } diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 3147702710af..04ea1569df78 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -1035,20 +1035,22 @@ static void acpi_sleep_hibernate_setup(void) static inline void acpi_sleep_hibernate_setup(void) {} #endif /* !CONFIG_HIBERNATION */ -static void acpi_power_off_prepare(void) +static int acpi_power_off_prepare(struct sys_off_data *data) { /* Prepare to power off the system */ acpi_sleep_prepare(ACPI_STATE_S5); acpi_disable_all_gpes(); acpi_os_wait_events_complete(); + return NOTIFY_DONE; } -static void acpi_power_off(void) +static int acpi_power_off(struct sys_off_data *data) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ pr_debug("%s called\n", __func__); local_irq_disable(); acpi_enter_sleep_state(ACPI_STATE_S5); + return NOTIFY_DONE; } int __init acpi_sleep_init(void) @@ -1067,8 +1069,14 @@ int __init acpi_sleep_init(void) if (acpi_sleep_state_supported(ACPI_STATE_S5)) { sleep_states[ACPI_STATE_S5] = 1; - pm_power_off_prepare = acpi_power_off_prepare; - pm_power_off = acpi_power_off; + + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF_PREPARE, + SYS_OFF_PRIO_FIRMWARE, + acpi_power_off_prepare, NULL); + + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_FIRMWARE, + acpi_power_off, NULL); } else { acpi_no_s5 = true; } diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index d092c9bb4ba3..24eaf0ec344d 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -61,6 +61,8 @@ static struct cppc_workaround_oem_info wa_info[] = { } }; +static struct cpufreq_driver cppc_cpufreq_driver; + #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE /* Frequency invariance support */ @@ -75,7 +77,6 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; -static struct cpufreq_driver cppc_cpufreq_driver; static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, struct cppc_perf_fb_ctrs *fb_ctrs_t0, @@ -440,6 +441,14 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) } return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } +#else +static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) +{ + return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; +} +#endif + +#if defined(CONFIG_ARM64) && defined(CONFIG_ENERGY_MODEL) static DEFINE_PER_CPU(unsigned int, efficiency_class); static void cppc_cpufreq_register_em(struct cpufreq_policy *policy); @@ -620,21 +629,12 @@ static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) } #else - -static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) -{ - return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; -} static int populate_efficiency_class(void) { return 0; } -static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) -{ -} #endif - static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) { struct cppc_cpudata *cpu_data; diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 866163883b48..37a1eb20f5ba 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -8,18 +8,22 @@ #include #include #include +#include #include #include +#include #include #include #include -#include -#include -#define MIN_VOLT_SHIFT (100000) -#define MAX_VOLT_SHIFT (200000) -#define MAX_VOLT_LIMIT (1150000) -#define VOLT_TOL (10000) +struct mtk_cpufreq_platform_data { + int min_volt_shift; + int max_volt_shift; + int proc_max_volt; + int sram_min_volt; + int sram_max_volt; + bool ccifreq_supported; +}; /* * The struct mtk_cpu_dvfs_info holds necessary information for doing CPU DVFS @@ -35,6 +39,7 @@ struct mtk_cpu_dvfs_info { struct cpumask cpus; struct device *cpu_dev; + struct device *cci_dev; struct regulator *proc_reg; struct regulator *sram_reg; struct clk *cpu_clk; @@ -42,8 +47,20 @@ struct mtk_cpu_dvfs_info { struct list_head list_head; int intermediate_voltage; bool need_voltage_tracking; + int vproc_on_boot; + int pre_vproc; + /* Avoid race condition for regulators between notify and policy */ + struct mutex reg_lock; + struct notifier_block opp_nb; + unsigned int opp_cpu; + unsigned long current_freq; + const struct mtk_cpufreq_platform_data *soc_data; + int vtrack_max; + bool ccifreq_bound; }; +static struct platform_device *cpufreq_pdev; + static LIST_HEAD(dvfs_info_list); static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) @@ -61,142 +78,123 @@ static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, int new_vproc) { + const struct mtk_cpufreq_platform_data *soc_data = info->soc_data; struct regulator *proc_reg = info->proc_reg; struct regulator *sram_reg = info->sram_reg; - int old_vproc, old_vsram, new_vsram, vsram, vproc, ret; + int pre_vproc, pre_vsram, new_vsram, vsram, vproc, ret; + int retry = info->vtrack_max; - old_vproc = regulator_get_voltage(proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc); - return old_vproc; + pre_vproc = regulator_get_voltage(proc_reg); + if (pre_vproc < 0) { + dev_err(info->cpu_dev, + "invalid Vproc value: %d\n", pre_vproc); + return pre_vproc; } - /* Vsram should not exceed the maximum allowed voltage of SoC. */ - new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT); - if (old_vproc < new_vproc) { - /* - * When scaling up voltages, Vsram and Vproc scale up step - * by step. At each step, set Vsram to (Vproc + 200mV) first, - * then set Vproc to (Vsram - 100mV). - * Keep doing it until Vsram and Vproc hit target voltages. - */ - do { - old_vsram = regulator_get_voltage(sram_reg); - if (old_vsram < 0) { - pr_err("%s: invalid Vsram value: %d\n", - __func__, old_vsram); - return old_vsram; - } - old_vproc = regulator_get_voltage(proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", - __func__, old_vproc); - return old_vproc; - } + pre_vsram = regulator_get_voltage(sram_reg); + if (pre_vsram < 0) { + dev_err(info->cpu_dev, "invalid Vsram value: %d\n", pre_vsram); + return pre_vsram; + } - vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT); + new_vsram = clamp(new_vproc + soc_data->min_volt_shift, + soc_data->sram_min_volt, soc_data->sram_max_volt); - if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { - vsram = MAX_VOLT_LIMIT; + do { + if (pre_vproc <= new_vproc) { + vsram = clamp(pre_vproc + soc_data->max_volt_shift, + soc_data->sram_min_volt, new_vsram); + ret = regulator_set_voltage(sram_reg, vsram, + soc_data->sram_max_volt); - /* - * If the target Vsram hits the maximum voltage, - * try to set the exact voltage value first. - */ - ret = regulator_set_voltage(sram_reg, vsram, - vsram); - if (ret) - ret = regulator_set_voltage(sram_reg, - vsram - VOLT_TOL, - vsram); - - vproc = new_vproc; - } else { - ret = regulator_set_voltage(sram_reg, vsram, - vsram + VOLT_TOL); - - vproc = vsram - MIN_VOLT_SHIFT; - } if (ret) return ret; + if (vsram == soc_data->sram_max_volt || + new_vsram == soc_data->sram_min_volt) + vproc = new_vproc; + else + vproc = vsram - soc_data->min_volt_shift; + ret = regulator_set_voltage(proc_reg, vproc, - vproc + VOLT_TOL); + soc_data->proc_max_volt); if (ret) { - regulator_set_voltage(sram_reg, old_vsram, - old_vsram); + regulator_set_voltage(sram_reg, pre_vsram, + soc_data->sram_max_volt); return ret; } - } while (vproc < new_vproc || vsram < new_vsram); - } else if (old_vproc > new_vproc) { - /* - * When scaling down voltages, Vsram and Vproc scale down step - * by step. At each step, set Vproc to (Vsram - 200mV) first, - * then set Vproc to (Vproc + 100mV). - * Keep doing it until Vsram and Vproc hit target voltages. - */ - do { - old_vproc = regulator_get_voltage(proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", - __func__, old_vproc); - return old_vproc; - } - old_vsram = regulator_get_voltage(sram_reg); - if (old_vsram < 0) { - pr_err("%s: invalid Vsram value: %d\n", - __func__, old_vsram); - return old_vsram; - } - - vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT); + } else if (pre_vproc > new_vproc) { + vproc = max(new_vproc, + pre_vsram - soc_data->max_volt_shift); ret = regulator_set_voltage(proc_reg, vproc, - vproc + VOLT_TOL); + soc_data->proc_max_volt); if (ret) return ret; if (vproc == new_vproc) vsram = new_vsram; else - vsram = max(new_vsram, vproc + MIN_VOLT_SHIFT); - - if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { - vsram = MAX_VOLT_LIMIT; - - /* - * If the target Vsram hits the maximum voltage, - * try to set the exact voltage value first. - */ - ret = regulator_set_voltage(sram_reg, vsram, - vsram); - if (ret) - ret = regulator_set_voltage(sram_reg, - vsram - VOLT_TOL, - vsram); - } else { - ret = regulator_set_voltage(sram_reg, vsram, - vsram + VOLT_TOL); - } + vsram = max(new_vsram, + vproc + soc_data->min_volt_shift); + ret = regulator_set_voltage(sram_reg, vsram, + soc_data->sram_max_volt); if (ret) { - regulator_set_voltage(proc_reg, old_vproc, - old_vproc); + regulator_set_voltage(proc_reg, pre_vproc, + soc_data->proc_max_volt); return ret; } - } while (vproc > new_vproc + VOLT_TOL || - vsram > new_vsram + VOLT_TOL); - } + } + + pre_vproc = vproc; + pre_vsram = vsram; + + if (--retry < 0) { + dev_err(info->cpu_dev, + "over loop count, failed to set voltage\n"); + return -EINVAL; + } + } while (vproc != new_vproc || vsram != new_vsram); return 0; } static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc) { + const struct mtk_cpufreq_platform_data *soc_data = info->soc_data; + int ret; + if (info->need_voltage_tracking) - return mtk_cpufreq_voltage_tracking(info, vproc); + ret = mtk_cpufreq_voltage_tracking(info, vproc); else - return regulator_set_voltage(info->proc_reg, vproc, - vproc + VOLT_TOL); + ret = regulator_set_voltage(info->proc_reg, vproc, + soc_data->proc_max_volt); + if (!ret) + info->pre_vproc = vproc; + + return ret; +} + +static bool is_ccifreq_ready(struct mtk_cpu_dvfs_info *info) +{ + struct device_link *sup_link; + + if (info->ccifreq_bound) + return true; + + sup_link = device_link_add(info->cpu_dev, info->cci_dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!sup_link) { + dev_err(info->cpu_dev, "cpu%d: sup_link is NULL\n", info->opp_cpu); + return false; + } + + if (sup_link->supplier->links.status != DL_DEV_DRIVER_BOUND) + return false; + + info->ccifreq_bound = true; + + return true; } static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, @@ -208,209 +206,334 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, struct mtk_cpu_dvfs_info *info = policy->driver_data; struct device *cpu_dev = info->cpu_dev; struct dev_pm_opp *opp; - long freq_hz, old_freq_hz; - int vproc, old_vproc, inter_vproc, target_vproc, ret; + long freq_hz, pre_freq_hz; + int vproc, pre_vproc, inter_vproc, target_vproc, ret; inter_vproc = info->intermediate_voltage; - old_freq_hz = clk_get_rate(cpu_clk); - old_vproc = regulator_get_voltage(info->proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc); - return old_vproc; + pre_freq_hz = clk_get_rate(cpu_clk); + + mutex_lock(&info->reg_lock); + + if (unlikely(info->pre_vproc <= 0)) + pre_vproc = regulator_get_voltage(info->proc_reg); + else + pre_vproc = info->pre_vproc; + + if (pre_vproc < 0) { + dev_err(cpu_dev, "invalid Vproc value: %d\n", pre_vproc); + ret = pre_vproc; + goto out; } freq_hz = freq_table[index].frequency * 1000; opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz); if (IS_ERR(opp)) { - pr_err("cpu%d: failed to find OPP for %ld\n", - policy->cpu, freq_hz); - return PTR_ERR(opp); + dev_err(cpu_dev, "cpu%d: failed to find OPP for %ld\n", + policy->cpu, freq_hz); + ret = PTR_ERR(opp); + goto out; } vproc = dev_pm_opp_get_voltage(opp); dev_pm_opp_put(opp); + /* + * If MediaTek cci is supported but is not ready, we will use the value + * of max(target cpu voltage, booting voltage) to prevent high freqeuncy + * low voltage crash. + */ + if (info->soc_data->ccifreq_supported && !is_ccifreq_ready(info)) + vproc = max(vproc, info->vproc_on_boot); + /* * If the new voltage or the intermediate voltage is higher than the * current voltage, scale up voltage first. */ - target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc; - if (old_vproc < target_vproc) { + target_vproc = max(inter_vproc, vproc); + if (pre_vproc <= target_vproc) { ret = mtk_cpufreq_set_voltage(info, target_vproc); if (ret) { - pr_err("cpu%d: failed to scale up voltage!\n", - policy->cpu); - mtk_cpufreq_set_voltage(info, old_vproc); - return ret; + dev_err(cpu_dev, + "cpu%d: failed to scale up voltage!\n", policy->cpu); + mtk_cpufreq_set_voltage(info, pre_vproc); + goto out; } } /* Reparent the CPU clock to intermediate clock. */ ret = clk_set_parent(cpu_clk, info->inter_clk); if (ret) { - pr_err("cpu%d: failed to re-parent cpu clock!\n", - policy->cpu); - mtk_cpufreq_set_voltage(info, old_vproc); - WARN_ON(1); - return ret; + dev_err(cpu_dev, + "cpu%d: failed to re-parent cpu clock!\n", policy->cpu); + mtk_cpufreq_set_voltage(info, pre_vproc); + goto out; } /* Set the original PLL to target rate. */ ret = clk_set_rate(armpll, freq_hz); if (ret) { - pr_err("cpu%d: failed to scale cpu clock rate!\n", - policy->cpu); + dev_err(cpu_dev, + "cpu%d: failed to scale cpu clock rate!\n", policy->cpu); clk_set_parent(cpu_clk, armpll); - mtk_cpufreq_set_voltage(info, old_vproc); - return ret; + mtk_cpufreq_set_voltage(info, pre_vproc); + goto out; } /* Set parent of CPU clock back to the original PLL. */ ret = clk_set_parent(cpu_clk, armpll); if (ret) { - pr_err("cpu%d: failed to re-parent cpu clock!\n", - policy->cpu); + dev_err(cpu_dev, + "cpu%d: failed to re-parent cpu clock!\n", policy->cpu); mtk_cpufreq_set_voltage(info, inter_vproc); - WARN_ON(1); - return ret; + goto out; } /* * If the new voltage is lower than the intermediate voltage or the * original voltage, scale down to the new voltage. */ - if (vproc < inter_vproc || vproc < old_vproc) { + if (vproc < inter_vproc || vproc < pre_vproc) { ret = mtk_cpufreq_set_voltage(info, vproc); if (ret) { - pr_err("cpu%d: failed to scale down voltage!\n", - policy->cpu); + dev_err(cpu_dev, + "cpu%d: failed to scale down voltage!\n", policy->cpu); clk_set_parent(cpu_clk, info->inter_clk); - clk_set_rate(armpll, old_freq_hz); + clk_set_rate(armpll, pre_freq_hz); clk_set_parent(cpu_clk, armpll); - return ret; + goto out; } } - return 0; + info->current_freq = freq_hz; + +out: + mutex_unlock(&info->reg_lock); + + return ret; } #define DYNAMIC_POWER "dynamic-power-coefficient" +static int mtk_cpufreq_opp_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct dev_pm_opp *opp = data; + struct dev_pm_opp *new_opp; + struct mtk_cpu_dvfs_info *info; + unsigned long freq, volt; + struct cpufreq_policy *policy; + int ret = 0; + + info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb); + + if (event == OPP_EVENT_ADJUST_VOLTAGE) { + freq = dev_pm_opp_get_freq(opp); + + mutex_lock(&info->reg_lock); + if (info->current_freq == freq) { + volt = dev_pm_opp_get_voltage(opp); + ret = mtk_cpufreq_set_voltage(info, volt); + if (ret) + dev_err(info->cpu_dev, + "failed to scale voltage: %d\n", ret); + } + mutex_unlock(&info->reg_lock); + } else if (event == OPP_EVENT_DISABLE) { + freq = dev_pm_opp_get_freq(opp); + + /* case of current opp item is disabled */ + if (info->current_freq == freq) { + freq = 1; + new_opp = dev_pm_opp_find_freq_ceil(info->cpu_dev, + &freq); + if (IS_ERR(new_opp)) { + dev_err(info->cpu_dev, + "all opp items are disabled\n"); + ret = PTR_ERR(new_opp); + return notifier_from_errno(ret); + } + + dev_pm_opp_put(new_opp); + policy = cpufreq_cpu_get(info->opp_cpu); + if (policy) { + cpufreq_driver_target(policy, freq / 1000, + CPUFREQ_RELATION_L); + cpufreq_cpu_put(policy); + } + } + } + + return notifier_from_errno(ret); +} + +static struct device *of_get_cci(struct device *cpu_dev) +{ + struct device_node *np; + struct platform_device *pdev; + + np = of_parse_phandle(cpu_dev->of_node, "mediatek,cci", 0); + if (IS_ERR_OR_NULL(np)) + return NULL; + + pdev = of_find_device_by_node(np); + of_node_put(np); + if (IS_ERR_OR_NULL(pdev)) + return NULL; + + return &pdev->dev; +} + static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) { struct device *cpu_dev; - struct regulator *proc_reg = ERR_PTR(-ENODEV); - struct regulator *sram_reg = ERR_PTR(-ENODEV); - struct clk *cpu_clk = ERR_PTR(-ENODEV); - struct clk *inter_clk = ERR_PTR(-ENODEV); struct dev_pm_opp *opp; unsigned long rate; int ret; cpu_dev = get_cpu_device(cpu); if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", cpu); + dev_err(cpu_dev, "failed to get cpu%d device\n", cpu); return -ENODEV; } + info->cpu_dev = cpu_dev; - cpu_clk = clk_get(cpu_dev, "cpu"); - if (IS_ERR(cpu_clk)) { - if (PTR_ERR(cpu_clk) == -EPROBE_DEFER) - pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu); - else - pr_err("failed to get cpu clk for cpu%d\n", cpu); - - ret = PTR_ERR(cpu_clk); - return ret; + info->ccifreq_bound = false; + if (info->soc_data->ccifreq_supported) { + info->cci_dev = of_get_cci(info->cpu_dev); + if (IS_ERR_OR_NULL(info->cci_dev)) { + ret = PTR_ERR(info->cci_dev); + dev_err(cpu_dev, "cpu%d: failed to get cci device\n", cpu); + return -ENODEV; + } } - inter_clk = clk_get(cpu_dev, "intermediate"); - if (IS_ERR(inter_clk)) { - if (PTR_ERR(inter_clk) == -EPROBE_DEFER) - pr_warn("intermediate clk for cpu%d not ready, retry.\n", - cpu); - else - pr_err("failed to get intermediate clk for cpu%d\n", - cpu); + info->cpu_clk = clk_get(cpu_dev, "cpu"); + if (IS_ERR(info->cpu_clk)) { + ret = PTR_ERR(info->cpu_clk); + return dev_err_probe(cpu_dev, ret, + "cpu%d: failed to get cpu clk\n", cpu); + } - ret = PTR_ERR(inter_clk); + info->inter_clk = clk_get(cpu_dev, "intermediate"); + if (IS_ERR(info->inter_clk)) { + ret = PTR_ERR(info->inter_clk); + dev_err_probe(cpu_dev, ret, + "cpu%d: failed to get intermediate clk\n", cpu); goto out_free_resources; } - proc_reg = regulator_get_optional(cpu_dev, "proc"); - if (IS_ERR(proc_reg)) { - if (PTR_ERR(proc_reg) == -EPROBE_DEFER) - pr_warn("proc regulator for cpu%d not ready, retry.\n", - cpu); - else - pr_err("failed to get proc regulator for cpu%d\n", - cpu); + info->proc_reg = regulator_get_optional(cpu_dev, "proc"); + if (IS_ERR(info->proc_reg)) { + ret = PTR_ERR(info->proc_reg); + dev_err_probe(cpu_dev, ret, + "cpu%d: failed to get proc regulator\n", cpu); + goto out_free_resources; + } - ret = PTR_ERR(proc_reg); + ret = regulator_enable(info->proc_reg); + if (ret) { + dev_warn(cpu_dev, "cpu%d: failed to enable vproc\n", cpu); goto out_free_resources; } /* Both presence and absence of sram regulator are valid cases. */ - sram_reg = regulator_get_exclusive(cpu_dev, "sram"); + info->sram_reg = regulator_get_optional(cpu_dev, "sram"); + if (IS_ERR(info->sram_reg)) + info->sram_reg = NULL; + else { + ret = regulator_enable(info->sram_reg); + if (ret) { + dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu); + goto out_free_resources; + } + } /* Get OPP-sharing information from "operating-points-v2" bindings */ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, &info->cpus); if (ret) { - pr_err("failed to get OPP-sharing information for cpu%d\n", - cpu); + dev_err(cpu_dev, + "cpu%d: failed to get OPP-sharing information\n", cpu); goto out_free_resources; } ret = dev_pm_opp_of_cpumask_add_table(&info->cpus); if (ret) { - pr_warn("no OPP table for cpu%d\n", cpu); + dev_warn(cpu_dev, "cpu%d: no OPP table\n", cpu); goto out_free_resources; } + ret = clk_prepare_enable(info->cpu_clk); + if (ret) + goto out_free_opp_table; + + ret = clk_prepare_enable(info->inter_clk); + if (ret) + goto out_disable_mux_clock; + + if (info->soc_data->ccifreq_supported) { + info->vproc_on_boot = regulator_get_voltage(info->proc_reg); + if (info->vproc_on_boot < 0) { + dev_err(info->cpu_dev, + "invalid Vproc value: %d\n", info->vproc_on_boot); + goto out_disable_inter_clock; + } + } + /* Search a safe voltage for intermediate frequency. */ - rate = clk_get_rate(inter_clk); + rate = clk_get_rate(info->inter_clk); opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate); if (IS_ERR(opp)) { - pr_err("failed to get intermediate opp for cpu%d\n", cpu); + dev_err(cpu_dev, "cpu%d: failed to get intermediate opp\n", cpu); ret = PTR_ERR(opp); - goto out_free_opp_table; + goto out_disable_inter_clock; } info->intermediate_voltage = dev_pm_opp_get_voltage(opp); dev_pm_opp_put(opp); - info->cpu_dev = cpu_dev; - info->proc_reg = proc_reg; - info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg; - info->cpu_clk = cpu_clk; - info->inter_clk = inter_clk; + mutex_init(&info->reg_lock); + info->current_freq = clk_get_rate(info->cpu_clk); + + info->opp_cpu = cpu; + info->opp_nb.notifier_call = mtk_cpufreq_opp_notifier; + ret = dev_pm_opp_register_notifier(cpu_dev, &info->opp_nb); + if (ret) { + dev_err(cpu_dev, "cpu%d: failed to register opp notifier\n", cpu); + goto out_disable_inter_clock; + } /* * If SRAM regulator is present, software "voltage tracking" is needed * for this CPU power domain. */ - info->need_voltage_tracking = !IS_ERR(sram_reg); + info->need_voltage_tracking = (info->sram_reg != NULL); + + /* + * We assume min voltage is 0 and tracking target voltage using + * min_volt_shift for each iteration. + * The vtrack_max is 3 times of expeted iteration count. + */ + info->vtrack_max = 3 * DIV_ROUND_UP(max(info->soc_data->sram_max_volt, + info->soc_data->proc_max_volt), + info->soc_data->min_volt_shift); return 0; +out_disable_inter_clock: + clk_disable_unprepare(info->inter_clk); + +out_disable_mux_clock: + clk_disable_unprepare(info->cpu_clk); + out_free_opp_table: dev_pm_opp_of_cpumask_remove_table(&info->cpus); out_free_resources: - if (!IS_ERR(proc_reg)) - regulator_put(proc_reg); - if (!IS_ERR(sram_reg)) - regulator_put(sram_reg); - if (!IS_ERR(cpu_clk)) - clk_put(cpu_clk); - if (!IS_ERR(inter_clk)) - clk_put(inter_clk); + if (regulator_is_enabled(info->proc_reg)) + regulator_disable(info->proc_reg); + if (info->sram_reg && regulator_is_enabled(info->sram_reg)) + regulator_disable(info->sram_reg); - return ret; -} - -static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) -{ if (!IS_ERR(info->proc_reg)) regulator_put(info->proc_reg); if (!IS_ERR(info->sram_reg)) @@ -420,7 +543,30 @@ static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) if (!IS_ERR(info->inter_clk)) clk_put(info->inter_clk); + return ret; +} + +static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) +{ + if (!IS_ERR(info->proc_reg)) { + regulator_disable(info->proc_reg); + regulator_put(info->proc_reg); + } + if (!IS_ERR(info->sram_reg)) { + regulator_disable(info->sram_reg); + regulator_put(info->sram_reg); + } + if (!IS_ERR(info->cpu_clk)) { + clk_disable_unprepare(info->cpu_clk); + clk_put(info->cpu_clk); + } + if (!IS_ERR(info->inter_clk)) { + clk_disable_unprepare(info->inter_clk); + clk_put(info->inter_clk); + } + dev_pm_opp_of_cpumask_remove_table(&info->cpus); + dev_pm_opp_unregister_notifier(info->cpu_dev, &info->opp_nb); } static int mtk_cpufreq_init(struct cpufreq_policy *policy) @@ -432,14 +578,15 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) info = mtk_cpu_dvfs_info_lookup(policy->cpu); if (!info) { pr_err("dvfs info for cpu%d is not initialized.\n", - policy->cpu); + policy->cpu); return -EINVAL; } ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table); if (ret) { - pr_err("failed to init cpufreq table for cpu%d: %d\n", - policy->cpu, ret); + dev_err(info->cpu_dev, + "failed to init cpufreq table for cpu%d: %d\n", + policy->cpu, ret); return ret; } @@ -476,9 +623,17 @@ static struct cpufreq_driver mtk_cpufreq_driver = { static int mtk_cpufreq_probe(struct platform_device *pdev) { + const struct mtk_cpufreq_platform_data *data; struct mtk_cpu_dvfs_info *info, *tmp; int cpu, ret; + data = dev_get_platdata(&pdev->dev); + if (!data) { + dev_err(&pdev->dev, + "failed to get mtk cpufreq platform data\n"); + return -ENODEV; + } + for_each_possible_cpu(cpu) { info = mtk_cpu_dvfs_info_lookup(cpu); if (info) @@ -490,6 +645,7 @@ static int mtk_cpufreq_probe(struct platform_device *pdev) goto release_dvfs_info_list; } + info->soc_data = data; ret = mtk_cpu_dvfs_info_init(info, cpu); if (ret) { dev_err(&pdev->dev, @@ -525,20 +681,47 @@ static struct platform_driver mtk_cpufreq_platdrv = { .probe = mtk_cpufreq_probe, }; +static const struct mtk_cpufreq_platform_data mt2701_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1150000, + .sram_min_volt = 0, + .sram_max_volt = 1150000, + .ccifreq_supported = false, +}; + +static const struct mtk_cpufreq_platform_data mt8183_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1150000, + .sram_min_volt = 0, + .sram_max_volt = 1150000, + .ccifreq_supported = true, +}; + +static const struct mtk_cpufreq_platform_data mt8186_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 250000, + .proc_max_volt = 1118750, + .sram_min_volt = 850000, + .sram_max_volt = 1118750, + .ccifreq_supported = true, +}; + /* List of machines supported by this driver */ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { - { .compatible = "mediatek,mt2701", }, - { .compatible = "mediatek,mt2712", }, - { .compatible = "mediatek,mt7622", }, - { .compatible = "mediatek,mt7623", }, - { .compatible = "mediatek,mt8167", }, - { .compatible = "mediatek,mt817x", }, - { .compatible = "mediatek,mt8173", }, - { .compatible = "mediatek,mt8176", }, - { .compatible = "mediatek,mt8183", }, - { .compatible = "mediatek,mt8365", }, - { .compatible = "mediatek,mt8516", }, - + { .compatible = "mediatek,mt2701", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt2712", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt7622", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt7623", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8167", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt817x", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8173", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8176", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8183", .data = &mt8183_platform_data }, + { .compatible = "mediatek,mt8186", .data = &mt8186_platform_data }, + { .compatible = "mediatek,mt8365", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8516", .data = &mt2701_platform_data }, { } }; MODULE_DEVICE_TABLE(of, mtk_cpufreq_machines); @@ -547,7 +730,7 @@ static int __init mtk_cpufreq_driver_init(void) { struct device_node *np; const struct of_device_id *match; - struct platform_device *pdev; + const struct mtk_cpufreq_platform_data *data; int err; np = of_find_node_by_path("/"); @@ -560,6 +743,7 @@ static int __init mtk_cpufreq_driver_init(void) pr_debug("Machine is not compatible with mtk-cpufreq\n"); return -ENODEV; } + data = match->data; err = platform_driver_register(&mtk_cpufreq_platdrv); if (err) @@ -571,16 +755,24 @@ static int __init mtk_cpufreq_driver_init(void) * and the device registration codes are put here to handle defer * probing. */ - pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0); - if (IS_ERR(pdev)) { + cpufreq_pdev = platform_device_register_data(NULL, "mtk-cpufreq", -1, + data, sizeof(*data)); + if (IS_ERR(cpufreq_pdev)) { pr_err("failed to register mtk-cpufreq platform device\n"); platform_driver_unregister(&mtk_cpufreq_platdrv); - return PTR_ERR(pdev); + return PTR_ERR(cpufreq_pdev); } return 0; } -device_initcall(mtk_cpufreq_driver_init); +module_init(mtk_cpufreq_driver_init) + +static void __exit mtk_cpufreq_driver_exit(void) +{ + platform_device_unregister(cpufreq_pdev); + platform_driver_unregister(&mtk_cpufreq_platdrv); +} +module_exit(mtk_cpufreq_driver_exit) MODULE_DESCRIPTION("MediaTek CPUFreq driver"); MODULE_AUTHOR("Pi-Cheng Chen "); diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index ac381db25dbe..2a6a98764a8c 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved + * Copyright (c) 2020 - 2022, NVIDIA CORPORATION. All rights reserved */ #include @@ -24,6 +24,17 @@ #define CPUFREQ_TBL_STEP_HZ (50 * KHZ * KHZ) #define MAX_CNT ~0U +#define NDIV_MASK 0x1FF + +#define CORE_OFFSET(cpu) (cpu * 8) +#define CMU_CLKS_BASE 0x2000 +#define SCRATCH_FREQ_CORE_REG(data, cpu) (data->regs + CMU_CLKS_BASE + CORE_OFFSET(cpu)) + +#define MMCRAB_CLUSTER_BASE(cl) (0x30000 + (cl * 0x10000)) +#define CLUSTER_ACTMON_BASE(data, cl) \ + (data->regs + (MMCRAB_CLUSTER_BASE(cl) + data->soc->actmon_cntr_base)) +#define CORE_ACTMON_CNTR_REG(data, cl, cpu) (CLUSTER_ACTMON_BASE(data, cl) + CORE_OFFSET(cpu)) + /* cpufreq transisition latency */ #define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */ @@ -35,12 +46,6 @@ enum cluster { MAX_CLUSTERS, }; -struct tegra194_cpufreq_data { - void __iomem *regs; - size_t num_clusters; - struct cpufreq_frequency_table **tables; -}; - struct tegra_cpu_ctr { u32 cpu; u32 coreclk_cnt, last_coreclk_cnt; @@ -52,13 +57,127 @@ struct read_counters_work { struct tegra_cpu_ctr c; }; +struct tegra_cpufreq_ops { + void (*read_counters)(struct tegra_cpu_ctr *c); + void (*set_cpu_ndiv)(struct cpufreq_policy *policy, u64 ndiv); + void (*get_cpu_cluster_id)(u32 cpu, u32 *cpuid, u32 *clusterid); + int (*get_cpu_ndiv)(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv); +}; + +struct tegra_cpufreq_soc { + struct tegra_cpufreq_ops *ops; + int maxcpus_per_cluster; + phys_addr_t actmon_cntr_base; +}; + +struct tegra194_cpufreq_data { + void __iomem *regs; + size_t num_clusters; + struct cpufreq_frequency_table **tables; + const struct tegra_cpufreq_soc *soc; +}; + static struct workqueue_struct *read_counters_wq; -static void get_cpu_cluster(void *cluster) +static void tegra_get_cpu_mpidr(void *mpidr) { - u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + *((u64 *)mpidr) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; +} - *((uint32_t *)cluster) = MPIDR_AFFINITY_LEVEL(mpidr, 1); +static void tegra234_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) +{ + u64 mpidr; + + smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); + + if (cpuid) + *cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + if (clusterid) + *clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 2); +} + +static int tegra234_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + void __iomem *freq_core_reg; + u64 mpidr_id; + + /* use physical id to get address of per core frequency register */ + mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid; + freq_core_reg = SCRATCH_FREQ_CORE_REG(data, mpidr_id); + + *ndiv = readl(freq_core_reg) & NDIV_MASK; + + return 0; +} + +static void tegra234_set_cpu_ndiv(struct cpufreq_policy *policy, u64 ndiv) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + void __iomem *freq_core_reg; + u32 cpu, cpuid, clusterid; + u64 mpidr_id; + + for_each_cpu_and(cpu, policy->cpus, cpu_online_mask) { + data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid); + + /* use physical id to get address of per core frequency register */ + mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid; + freq_core_reg = SCRATCH_FREQ_CORE_REG(data, mpidr_id); + + writel(ndiv, freq_core_reg); + } +} + +/* + * This register provides access to two counter values with a single + * 64-bit read. The counter values are used to determine the average + * actual frequency a core has run at over a period of time. + * [63:32] PLLP counter: Counts at fixed frequency (408 MHz) + * [31:0] Core clock counter: Counts on every core clock cycle + */ +static void tegra234_read_counters(struct tegra_cpu_ctr *c) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + void __iomem *actmon_reg; + u32 cpuid, clusterid; + u64 val; + + data->soc->ops->get_cpu_cluster_id(c->cpu, &cpuid, &clusterid); + actmon_reg = CORE_ACTMON_CNTR_REG(data, clusterid, cpuid); + + val = readq(actmon_reg); + c->last_refclk_cnt = upper_32_bits(val); + c->last_coreclk_cnt = lower_32_bits(val); + udelay(US_DELAY); + val = readq(actmon_reg); + c->refclk_cnt = upper_32_bits(val); + c->coreclk_cnt = lower_32_bits(val); +} + +static struct tegra_cpufreq_ops tegra234_cpufreq_ops = { + .read_counters = tegra234_read_counters, + .get_cpu_cluster_id = tegra234_get_cpu_cluster_id, + .get_cpu_ndiv = tegra234_get_cpu_ndiv, + .set_cpu_ndiv = tegra234_set_cpu_ndiv, +}; + +const struct tegra_cpufreq_soc tegra234_cpufreq_soc = { + .ops = &tegra234_cpufreq_ops, + .actmon_cntr_base = 0x9000, + .maxcpus_per_cluster = 4, +}; + +static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) +{ + u64 mpidr; + + smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); + + if (cpuid) + *cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + if (clusterid) + *clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); } /* @@ -85,11 +204,24 @@ static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv); } +static void tegra194_read_counters(struct tegra_cpu_ctr *c) +{ + u64 val; + + val = read_freq_feedback(); + c->last_refclk_cnt = lower_32_bits(val); + c->last_coreclk_cnt = upper_32_bits(val); + udelay(US_DELAY); + val = read_freq_feedback(); + c->refclk_cnt = lower_32_bits(val); + c->coreclk_cnt = upper_32_bits(val); +} + static void tegra_read_counters(struct work_struct *work) { + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); struct read_counters_work *read_counters_work; struct tegra_cpu_ctr *c; - u64 val; /* * ref_clk_counter(32 bit counter) runs on constant clk, @@ -107,13 +239,7 @@ static void tegra_read_counters(struct work_struct *work) work); c = &read_counters_work->c; - val = read_freq_feedback(); - c->last_refclk_cnt = lower_32_bits(val); - c->last_coreclk_cnt = upper_32_bits(val); - udelay(US_DELAY); - val = read_freq_feedback(); - c->refclk_cnt = lower_32_bits(val); - c->coreclk_cnt = upper_32_bits(val); + data->soc->ops->read_counters(c); } /* @@ -177,7 +303,7 @@ static unsigned int tegra194_calculate_speed(u32 cpu) return (rate_mhz * KHZ); /* in KHz */ } -static void get_cpu_ndiv(void *ndiv) +static void tegra194_get_cpu_ndiv_sysreg(void *ndiv) { u64 ndiv_val; @@ -186,30 +312,43 @@ static void get_cpu_ndiv(void *ndiv) *(u64 *)ndiv = ndiv_val; } -static void set_cpu_ndiv(void *data) +static int tegra194_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) { - struct cpufreq_frequency_table *tbl = data; - u64 ndiv_val = (u64)tbl->driver_data; + int ret; + + ret = smp_call_function_single(cpu, tegra194_get_cpu_ndiv_sysreg, &ndiv, true); + + return ret; +} + +static void tegra194_set_cpu_ndiv_sysreg(void *data) +{ + u64 ndiv_val = *(u64 *)data; asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val)); } +static void tegra194_set_cpu_ndiv(struct cpufreq_policy *policy, u64 ndiv) +{ + on_each_cpu_mask(policy->cpus, tegra194_set_cpu_ndiv_sysreg, &ndiv, true); +} + static unsigned int tegra194_get_speed(u32 cpu) { struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); struct cpufreq_frequency_table *pos; + u32 cpuid, clusterid; unsigned int rate; u64 ndiv; int ret; - u32 cl; - smp_call_function_single(cpu, get_cpu_cluster, &cl, true); + data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid); /* reconstruct actual cpu freq using counters */ rate = tegra194_calculate_speed(cpu); /* get last written ndiv value */ - ret = smp_call_function_single(cpu, get_cpu_ndiv, &ndiv, true); + ret = data->soc->ops->get_cpu_ndiv(cpu, cpuid, clusterid, &ndiv); if (WARN_ON_ONCE(ret)) return rate; @@ -219,7 +358,7 @@ static unsigned int tegra194_get_speed(u32 cpu) * to the last written ndiv value from freq_table. This is * done to return consistent value. */ - cpufreq_for_each_valid_entry(pos, data->tables[cl]) { + cpufreq_for_each_valid_entry(pos, data->tables[clusterid]) { if (pos->driver_data != ndiv) continue; @@ -237,19 +376,22 @@ static unsigned int tegra194_get_speed(u32 cpu) static int tegra194_cpufreq_init(struct cpufreq_policy *policy) { struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); - u32 cpu; - u32 cl; + int maxcpus_per_cluster = data->soc->maxcpus_per_cluster; + u32 start_cpu, cpu; + u32 clusterid; - smp_call_function_single(policy->cpu, get_cpu_cluster, &cl, true); + data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid); - if (cl >= data->num_clusters || !data->tables[cl]) + if (clusterid >= data->num_clusters || !data->tables[clusterid]) return -EINVAL; + start_cpu = rounddown(policy->cpu, maxcpus_per_cluster); /* set same policy for all cpus in a cluster */ - for (cpu = (cl * 2); cpu < ((cl + 1) * 2); cpu++) - cpumask_set_cpu(cpu, policy->cpus); - - policy->freq_table = data->tables[cl]; + for (cpu = start_cpu; cpu < (start_cpu + maxcpus_per_cluster); cpu++) { + if (cpu_possible(cpu)) + cpumask_set_cpu(cpu, policy->cpus); + } + policy->freq_table = data->tables[clusterid]; policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY; return 0; @@ -259,13 +401,14 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { struct cpufreq_frequency_table *tbl = policy->freq_table + index; + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); /* * Each core writes frequency in per core register. Then both cores * in a cluster run at same frequency which is the maximum frequency * request out of the values requested by both cores in that cluster. */ - on_each_cpu_mask(policy->cpus, set_cpu_ndiv, tbl, true); + data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data); return 0; } @@ -280,6 +423,18 @@ static struct cpufreq_driver tegra194_cpufreq_driver = { .attr = cpufreq_generic_attr, }; +static struct tegra_cpufreq_ops tegra194_cpufreq_ops = { + .read_counters = tegra194_read_counters, + .get_cpu_cluster_id = tegra194_get_cpu_cluster_id, + .get_cpu_ndiv = tegra194_get_cpu_ndiv, + .set_cpu_ndiv = tegra194_set_cpu_ndiv, +}; + +const struct tegra_cpufreq_soc tegra194_cpufreq_soc = { + .ops = &tegra194_cpufreq_ops, + .maxcpus_per_cluster = 2, +}; + static void tegra194_cpufreq_free_resources(void) { destroy_workqueue(read_counters_wq); @@ -359,6 +514,7 @@ init_freq_table(struct platform_device *pdev, struct tegra_bpmp *bpmp, static int tegra194_cpufreq_probe(struct platform_device *pdev) { + const struct tegra_cpufreq_soc *soc; struct tegra194_cpufreq_data *data; struct tegra_bpmp *bpmp; int err, i; @@ -367,12 +523,28 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) if (!data) return -ENOMEM; + soc = of_device_get_match_data(&pdev->dev); + + if (soc->ops && soc->maxcpus_per_cluster) { + data->soc = soc; + } else { + dev_err(&pdev->dev, "soc data missing\n"); + return -EINVAL; + } + data->num_clusters = MAX_CLUSTERS; data->tables = devm_kcalloc(&pdev->dev, data->num_clusters, sizeof(*data->tables), GFP_KERNEL); if (!data->tables) return -ENOMEM; + if (soc->actmon_cntr_base) { + /* mmio registers are used for frequency request and re-construction */ + data->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->regs)) + return PTR_ERR(data->regs); + } + platform_set_drvdata(pdev, data); bpmp = tegra_bpmp_get(&pdev->dev); @@ -416,10 +588,10 @@ static int tegra194_cpufreq_remove(struct platform_device *pdev) } static const struct of_device_id tegra194_cpufreq_of_match[] = { - { .compatible = "nvidia,tegra194-ccplex", }, + { .compatible = "nvidia,tegra194-ccplex", .data = &tegra194_cpufreq_soc }, + { .compatible = "nvidia,tegra234-ccplex-cluster", .data = &tegra234_cpufreq_soc }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match); static struct platform_driver tegra194_ccplex_driver = { .driver = { diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 6c2a421b86e3..f305643209f0 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -630,7 +630,7 @@ static irqreturn_t emif_threaded_isr(int irq, void *dev_id) dev_emerg(emif->dev, "SDRAM temperature exceeds operating limit.. Needs shut down!!!\n"); /* If we have Power OFF ability, use it, else try restarting */ - if (pm_power_off) { + if (kernel_can_power_off()) { kernel_power_off(); } else { WARN(1, "FIXME: NO pm_power_off!!! trying restart\n"); diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 740407252298..84063eaebb91 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -456,103 +456,6 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); -/** - * dev_pm_opp_find_level_exact() - search for an exact level - * @dev: device for which we do this operation - * @level: level to search for - * - * Return: Searches for exact match in the opp table and returns pointer to the - * matching opp if found, else returns ERR_PTR in case of error and should - * be handled using IS_ERR. Error return values can be: - * EINVAL: for bad pointer - * ERANGE: no match found for search - * ENODEV: if device not found in list of registered devices - * - * The callers are required to call dev_pm_opp_put() for the returned OPP after - * use. - */ -struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, - unsigned int level) -{ - struct opp_table *opp_table; - struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); - - opp_table = _find_opp_table(dev); - if (IS_ERR(opp_table)) { - int r = PTR_ERR(opp_table); - - dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); - return ERR_PTR(r); - } - - mutex_lock(&opp_table->lock); - - list_for_each_entry(temp_opp, &opp_table->opp_list, node) { - if (temp_opp->level == level) { - opp = temp_opp; - - /* Increment the reference count of OPP */ - dev_pm_opp_get(opp); - break; - } - } - - mutex_unlock(&opp_table->lock); - dev_pm_opp_put_opp_table(opp_table); - - return opp; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_exact); - -/** - * dev_pm_opp_find_level_ceil() - search for an rounded up level - * @dev: device for which we do this operation - * @level: level to search for - * - * Return: Searches for rounded up match in the opp table and returns pointer - * to the matching opp if found, else returns ERR_PTR in case of error and - * should be handled using IS_ERR. Error return values can be: - * EINVAL: for bad pointer - * ERANGE: no match found for search - * ENODEV: if device not found in list of registered devices - * - * The callers are required to call dev_pm_opp_put() for the returned OPP after - * use. - */ -struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, - unsigned int *level) -{ - struct opp_table *opp_table; - struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); - - opp_table = _find_opp_table(dev); - if (IS_ERR(opp_table)) { - int r = PTR_ERR(opp_table); - - dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); - return ERR_PTR(r); - } - - mutex_lock(&opp_table->lock); - - list_for_each_entry(temp_opp, &opp_table->opp_list, node) { - if (temp_opp->available && temp_opp->level >= *level) { - opp = temp_opp; - *level = opp->level; - - /* Increment the reference count of OPP */ - dev_pm_opp_get(opp); - break; - } - } - - mutex_unlock(&opp_table->lock); - dev_pm_opp_put_opp_table(opp_table); - - return opp; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_ceil); - static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table, unsigned long *freq) { @@ -729,6 +632,223 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_by_volt); +/** + * dev_pm_opp_find_level_exact() - search for an exact level + * @dev: device for which we do this operation + * @level: level to search for + * + * Return: Searches for exact match in the opp table and returns pointer to the + * matching opp if found, else returns ERR_PTR in case of error and should + * be handled using IS_ERR. Error return values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int r = PTR_ERR(opp_table); + + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); + return ERR_PTR(r); + } + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->level == level) { + opp = temp_opp; + + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_exact); + +/** + * dev_pm_opp_find_level_ceil() - search for an rounded up level + * @dev: device for which we do this operation + * @level: level to search for + * + * Return: Searches for rounded up match in the opp table and returns pointer + * to the matching opp if found, else returns ERR_PTR in case of error and + * should be handled using IS_ERR. Error return values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, + unsigned int *level) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int r = PTR_ERR(opp_table); + + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); + return ERR_PTR(r); + } + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available && temp_opp->level >= *level) { + opp = temp_opp; + *level = opp->level; + + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_ceil); + +/** + * dev_pm_opp_find_bw_ceil() - Search for a rounded ceil bandwidth + * @dev: device for which we do this operation + * @freq: start bandwidth + * @index: which bandwidth to compare, in case of OPPs with several values + * + * Search for the matching floor *available* OPP from a starting bandwidth + * for a device. + * + * Return: matching *opp and refreshes *bw accordingly, else returns + * ERR_PTR in case of error and should be handled using IS_ERR. Error return + * values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, + unsigned int *bw, int index) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + if (!dev || !bw) { + dev_err(dev, "%s: Invalid argument bw=%p\n", __func__, bw); + return ERR_PTR(-EINVAL); + } + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); + + if (index >= opp_table->path_count) + return ERR_PTR(-EINVAL); + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available && temp_opp->bandwidth) { + if (temp_opp->bandwidth[index].peak >= *bw) { + opp = temp_opp; + *bw = opp->bandwidth[index].peak; + + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + } + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_bw_ceil); + +/** + * dev_pm_opp_find_bw_floor() - Search for a rounded floor bandwidth + * @dev: device for which we do this operation + * @freq: start bandwidth + * @index: which bandwidth to compare, in case of OPPs with several values + * + * Search for the matching floor *available* OPP from a starting bandwidth + * for a device. + * + * Return: matching *opp and refreshes *bw accordingly, else returns + * ERR_PTR in case of error and should be handled using IS_ERR. Error return + * values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, + unsigned int *bw, int index) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + if (!dev || !bw) { + dev_err(dev, "%s: Invalid argument bw=%p\n", __func__, bw); + return ERR_PTR(-EINVAL); + } + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); + + if (index >= opp_table->path_count) + return ERR_PTR(-EINVAL); + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available && temp_opp->bandwidth) { + /* go to the next node, before choosing prev */ + if (temp_opp->bandwidth[index].peak > *bw) + break; + opp = temp_opp; + } + } + + /* Increment the reference count of OPP */ + if (!IS_ERR(opp)) + dev_pm_opp_get(opp); + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + if (!IS_ERR(opp)) + *bw = opp->bandwidth[index].peak; + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_bw_floor); + static int _set_opp_voltage(struct device *dev, struct regulator *reg, struct dev_pm_opp_supply *supply) { @@ -1486,9 +1606,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put); */ void dev_pm_opp_remove(struct device *dev, unsigned long freq) { - struct dev_pm_opp *opp; + struct dev_pm_opp *opp = NULL, *iter; struct opp_table *opp_table; - bool found = false; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) @@ -1496,16 +1615,16 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq) mutex_lock(&opp_table->lock); - list_for_each_entry(opp, &opp_table->opp_list, node) { - if (opp->rate == freq) { - found = true; + list_for_each_entry(iter, &opp_table->opp_list, node) { + if (iter->rate == freq) { + opp = iter; break; } } mutex_unlock(&opp_table->lock); - if (found) { + if (opp) { dev_pm_opp_put(opp); /* Drop the reference taken by dev_pm_opp_add() */ @@ -2019,10 +2138,9 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, for (i = 0; i < count; i++) { reg = regulator_get_optional(dev, names[i]); if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - if (ret != -EPROBE_DEFER) - dev_err(dev, "%s: no regulator (%s) found: %d\n", - __func__, names[i], ret); + ret = dev_err_probe(dev, PTR_ERR(reg), + "%s: no regulator (%s) found\n", + __func__, names[i]); goto free_regulators; } @@ -2168,11 +2286,8 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) /* Find clk for the device */ opp_table->clk = clk_get(dev, name); if (IS_ERR(opp_table->clk)) { - ret = PTR_ERR(opp_table->clk); - if (ret != -EPROBE_DEFER) { - dev_err(dev, "%s: Couldn't find clock: %d\n", __func__, - ret); - } + ret = dev_err_probe(dev, PTR_ERR(opp_table->clk), + "%s: Couldn't find clock\n", __func__); goto err; } diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 3fcc1f97f2d1..1b6e5c55c3ed 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -195,14 +195,18 @@ void opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table) static void opp_migrate_dentry(struct opp_device *opp_dev, struct opp_table *opp_table) { - struct opp_device *new_dev; + struct opp_device *new_dev = NULL, *iter; const struct device *dev; struct dentry *dentry; /* Look for next opp-dev */ - list_for_each_entry(new_dev, &opp_table->dev_list, node) - if (new_dev != opp_dev) + list_for_each_entry(iter, &opp_table->dev_list, node) + if (iter != opp_dev) { + new_dev = iter; break; + } + + BUG_ON(!new_dev); /* new_dev is guaranteed to be valid here */ dev = new_dev->dev; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 485ea980bde7..30394929d700 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -437,11 +437,11 @@ static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table) /* Checking only first OPP is sufficient */ np = of_get_next_available_child(opp_np, NULL); + of_node_put(opp_np); if (!np) { dev_err(dev, "OPP table empty\n"); return -EINVAL; } - of_node_put(opp_np); prop = of_find_property(np, "opp-peak-kBps", NULL); of_node_put(np); diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index aa55cfca9e40..6b617024a67d 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -571,10 +572,10 @@ static inline struct device_node *match_of_node(int index) return pfuze_matches[index].of_node; } -static struct pfuze_chip *syspm_pfuze_chip; - -static void pfuze_power_off_prepare(void) +static int pfuze_power_off_prepare(struct sys_off_data *data) { + struct pfuze_chip *syspm_pfuze_chip = data->cb_data; + dev_info(syspm_pfuze_chip->dev, "Configure standby mode for power off"); /* Switch from default mode: APS/APS to APS/Off */ @@ -609,28 +610,30 @@ static void pfuze_power_off_prepare(void) regmap_update_bits(syspm_pfuze_chip->regmap, PFUZE100_VGEN6VOL, PFUZE100_VGENxLPWR | PFUZE100_VGENxSTBY, PFUZE100_VGENxSTBY); + + return NOTIFY_DONE; } static int pfuze_power_off_prepare_init(struct pfuze_chip *pfuze_chip) { + int err; + if (pfuze_chip->chip_id != PFUZE100) { dev_warn(pfuze_chip->dev, "Requested pm_power_off_prepare handler for not supported chip\n"); return -ENODEV; } - if (pm_power_off_prepare) { - dev_warn(pfuze_chip->dev, "pm_power_off_prepare is already registered.\n"); - return -EBUSY; + err = devm_register_sys_off_handler(pfuze_chip->dev, + SYS_OFF_MODE_POWER_OFF_PREPARE, + SYS_OFF_PRIO_DEFAULT, + pfuze_power_off_prepare, + pfuze_chip); + if (err) { + dev_err(pfuze_chip->dev, "failed to register sys-off handler: %d\n", + err); + return err; } - if (syspm_pfuze_chip) { - dev_warn(pfuze_chip->dev, "syspm_pfuze_chip is already set.\n"); - return -EBUSY; - } - - syspm_pfuze_chip = pfuze_chip; - pm_power_off_prepare = pfuze_power_off_prepare; - return 0; } @@ -839,23 +842,12 @@ static int pfuze100_regulator_probe(struct i2c_client *client, return 0; } -static int pfuze100_regulator_remove(struct i2c_client *client) -{ - if (syspm_pfuze_chip) { - syspm_pfuze_chip = NULL; - pm_power_off_prepare = NULL; - } - - return 0; -} - static struct i2c_driver pfuze_driver = { .driver = { .name = "pfuze100-regulator", .of_match_table = pfuze_dt_ids, }, .probe = pfuze100_regulator_probe, - .remove = pfuze100_regulator_remove, }; module_i2c_driver(pfuze_driver); diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index c77ecf61818b..5611d14d3ba2 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -108,6 +109,7 @@ #define PMC_USB_DEBOUNCE_DEL 0xec #define PMC_USB_AO 0xf0 +#define PMC_SCRATCH37 0x130 #define PMC_SCRATCH41 0x140 #define PMC_WAKE2_MASK 0x160 @@ -1101,8 +1103,7 @@ static struct notifier_block tegra_pmc_reboot_notifier = { .notifier_call = tegra_pmc_reboot_notify, }; -static int tegra_pmc_restart_notify(struct notifier_block *this, - unsigned long action, void *data) +static void tegra_pmc_restart(void) { u32 value; @@ -1110,14 +1111,31 @@ static int tegra_pmc_restart_notify(struct notifier_block *this, value = tegra_pmc_readl(pmc, PMC_CNTRL); value |= PMC_CNTRL_MAIN_RST; tegra_pmc_writel(pmc, value, PMC_CNTRL); +} + +static int tegra_pmc_restart_handler(struct sys_off_data *data) +{ + tegra_pmc_restart(); return NOTIFY_DONE; } -static struct notifier_block tegra_pmc_restart_handler = { - .notifier_call = tegra_pmc_restart_notify, - .priority = 128, -}; +static int tegra_pmc_power_off_handler(struct sys_off_data *data) +{ + /* + * Reboot Nexus 7 into special bootloader mode if USB cable is + * connected in order to display battery status and power off. + */ + if (of_machine_is_compatible("asus,grouper") && + power_supply_is_system_supplied()) { + const u32 go_to_charger_mode = 0xa5a55a5a; + + tegra_pmc_writel(pmc, go_to_charger_mode, PMC_SCRATCH37); + tegra_pmc_restart(); + } + + return NOTIFY_DONE; +} static int powergate_show(struct seq_file *s, void *data) { @@ -2879,6 +2897,42 @@ static int tegra_pmc_probe(struct platform_device *pdev) pmc->clk = NULL; } + /* + * PMC should be last resort for restarting since it soft-resets + * CPU without resetting everything else. + */ + err = devm_register_reboot_notifier(&pdev->dev, + &tegra_pmc_reboot_notifier); + if (err) { + dev_err(&pdev->dev, "unable to register reboot notifier, %d\n", + err); + return err; + } + + err = devm_register_sys_off_handler(&pdev->dev, + SYS_OFF_MODE_RESTART, + SYS_OFF_PRIO_LOW, + tegra_pmc_restart_handler, NULL); + if (err) { + dev_err(&pdev->dev, "failed to register sys-off handler: %d\n", + err); + return err; + } + + /* + * PMC should be primary power-off method if it soft-resets CPU, + * asking bootloader to shutdown hardware. + */ + err = devm_register_sys_off_handler(&pdev->dev, + SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_FIRMWARE, + tegra_pmc_power_off_handler, NULL); + if (err) { + dev_err(&pdev->dev, "failed to register sys-off handler: %d\n", + err); + return err; + } + /* * PCLK clock rate can't be retrieved using CLK API because it * causes lockup if CPU enters LP2 idle state from some other @@ -2910,28 +2964,13 @@ static int tegra_pmc_probe(struct platform_device *pdev) goto cleanup_sysfs; } - err = devm_register_reboot_notifier(&pdev->dev, - &tegra_pmc_reboot_notifier); - if (err) { - dev_err(&pdev->dev, "unable to register reboot notifier, %d\n", - err); - goto cleanup_debugfs; - } - - err = register_restart_handler(&tegra_pmc_restart_handler); - if (err) { - dev_err(&pdev->dev, "unable to register restart handler, %d\n", - err); - goto cleanup_debugfs; - } - err = tegra_pmc_pinctrl_init(pmc); if (err) - goto cleanup_restart_handler; + goto cleanup_debugfs; err = tegra_pmc_regmap_init(pmc); if (err < 0) - goto cleanup_restart_handler; + goto cleanup_debugfs; err = tegra_powergate_init(pmc, pdev->dev.of_node); if (err < 0) @@ -2954,8 +2993,6 @@ static int tegra_pmc_probe(struct platform_device *pdev) cleanup_powergates: tegra_powergate_remove_all(pdev->dev.of_node); -cleanup_restart_handler: - unregister_restart_handler(&tegra_pmc_restart_handler); cleanup_debugfs: debugfs_remove(pmc->debugfs); cleanup_sysfs: diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 87069b8459af..aef88c2d1173 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -150,6 +150,11 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh, extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); +extern int atomic_notifier_chain_register_unique_prio( + struct atomic_notifier_head *nh, struct notifier_block *nb); +extern int blocking_notifier_chain_register_unique_prio( + struct blocking_notifier_head *nh, struct notifier_block *nb); + extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, @@ -173,6 +178,8 @@ extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); +extern bool atomic_notifier_call_chain_is_empty(struct atomic_notifier_head *nh); + #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 70ec69d8bafd..871c9c49ec9d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -21,7 +21,6 @@ * Callbacks for platform drivers to implement. */ extern void (*pm_power_off)(void); -extern void (*pm_power_off_prepare)(void); struct device; /* we have a circular dep with device.h */ #ifdef CONFIG_VT_CONSOLE_SLEEP diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0d85a63a1f78..6708b4ec244d 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -117,18 +117,25 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); -struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, - unsigned int level); -struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, - unsigned int *level); - struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, unsigned long u_volt); +struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level); +struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, + unsigned int *level); + struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq); + +struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, + unsigned int *bw, int index); + +struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, + unsigned int *bw, int index); + void dev_pm_opp_put(struct dev_pm_opp *opp); int dev_pm_opp_add(struct device *dev, unsigned long freq, @@ -243,12 +250,6 @@ static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev) return 0; } -static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, - unsigned long freq, bool available) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, unsigned int level) { @@ -261,6 +262,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, + unsigned long freq, bool available) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq) { @@ -279,6 +286,18 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, + unsigned int *bw, int index) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, + unsigned int *bw, int index) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, diff --git a/include/linux/reboot.h b/include/linux/reboot.h index a2429648d831..e5d9ef886179 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -7,6 +7,7 @@ #include struct device; +struct sys_off_handler; #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN @@ -62,6 +63,95 @@ extern void machine_shutdown(void); struct pt_regs; extern void machine_crash_shutdown(struct pt_regs *); +void do_kernel_power_off(void); + +/* + * sys-off handler API. + */ + +/* + * Standard sys-off priority levels. Users are expected to set priorities + * relative to the standard levels. + * + * SYS_OFF_PRIO_PLATFORM: Use this for platform-level handlers. + * + * SYS_OFF_PRIO_LOW: Use this for handler of last resort. + * + * SYS_OFF_PRIO_DEFAULT: Use this for normal handlers. + * + * SYS_OFF_PRIO_HIGH: Use this for higher priority handlers. + * + * SYS_OFF_PRIO_FIRMWARE: Use this if handler uses firmware call. + */ +#define SYS_OFF_PRIO_PLATFORM -256 +#define SYS_OFF_PRIO_LOW -128 +#define SYS_OFF_PRIO_DEFAULT 0 +#define SYS_OFF_PRIO_HIGH 192 +#define SYS_OFF_PRIO_FIRMWARE 224 + +enum sys_off_mode { + /** + * @SYS_OFF_MODE_POWER_OFF_PREPARE: + * + * Handlers prepare system to be powered off. Handlers are + * allowed to sleep. + */ + SYS_OFF_MODE_POWER_OFF_PREPARE, + + /** + * @SYS_OFF_MODE_POWER_OFF: + * + * Handlers power-off system. Handlers are disallowed to sleep. + */ + SYS_OFF_MODE_POWER_OFF, + + /** + * @SYS_OFF_MODE_RESTART: + * + * Handlers restart system. Handlers are disallowed to sleep. + */ + SYS_OFF_MODE_RESTART, +}; + +/** + * struct sys_off_data - sys-off callback argument + * + * @mode: Mode ID. Currently used only by the sys-off restart mode, + * see enum reboot_mode for the available modes. + * @cb_data: User's callback data. + * @cmd: Command string. Currently used only by the sys-off restart mode, + * NULL otherwise. + */ +struct sys_off_data { + int mode; + void *cb_data; + const char *cmd; +}; + +struct sys_off_handler * +register_sys_off_handler(enum sys_off_mode mode, + int priority, + int (*callback)(struct sys_off_data *data), + void *cb_data); +void unregister_sys_off_handler(struct sys_off_handler *handler); + +int devm_register_sys_off_handler(struct device *dev, + enum sys_off_mode mode, + int priority, + int (*callback)(struct sys_off_data *data), + void *cb_data); + +int devm_register_power_off_handler(struct device *dev, + int (*callback)(struct sys_off_data *data), + void *cb_data); + +int devm_register_restart_handler(struct device *dev, + int (*callback)(struct sys_off_data *data), + void *cb_data); + +int register_platform_power_off(void (*power_off)(void)); +void unregister_platform_power_off(void (*power_off)(void)); + /* * Architecture independent implemenations of sys_reboot commands. */ @@ -70,6 +160,7 @@ extern void kernel_restart_prepare(char *cmd); extern void kernel_restart(char *cmd); extern void kernel_halt(void); extern void kernel_power_off(void); +extern bool kernel_can_power_off(void); void ctrl_alt_del(void); diff --git a/kernel/notifier.c b/kernel/notifier.c index ba005ebf4730..0d5bd62c480e 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -20,7 +20,8 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); */ static int notifier_chain_register(struct notifier_block **nl, - struct notifier_block *n) + struct notifier_block *n, + bool unique_priority) { while ((*nl) != NULL) { if (unlikely((*nl) == n)) { @@ -30,6 +31,8 @@ static int notifier_chain_register(struct notifier_block **nl, } if (n->priority > (*nl)->priority) break; + if (n->priority == (*nl)->priority && unique_priority) + return -EBUSY; nl = &((*nl)->next); } n->next = *nl; @@ -144,12 +147,35 @@ int atomic_notifier_chain_register(struct atomic_notifier_head *nh, int ret; spin_lock_irqsave(&nh->lock, flags); - ret = notifier_chain_register(&nh->head, n); + ret = notifier_chain_register(&nh->head, n, false); spin_unlock_irqrestore(&nh->lock, flags); return ret; } EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); +/** + * atomic_notifier_chain_register_unique_prio - Add notifier to an atomic notifier chain + * @nh: Pointer to head of the atomic notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to an atomic notifier chain if there is no other + * notifier registered using the same priority. + * + * Returns 0 on success, %-EEXIST or %-EBUSY on error. + */ +int atomic_notifier_chain_register_unique_prio(struct atomic_notifier_head *nh, + struct notifier_block *n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&nh->lock, flags); + ret = notifier_chain_register(&nh->head, n, true); + spin_unlock_irqrestore(&nh->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(atomic_notifier_chain_register_unique_prio); + /** * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain * @nh: Pointer to head of the atomic notifier chain @@ -204,11 +230,44 @@ int atomic_notifier_call_chain(struct atomic_notifier_head *nh, EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); NOKPROBE_SYMBOL(atomic_notifier_call_chain); +/** + * atomic_notifier_call_chain_is_empty - Check whether notifier chain is empty + * @nh: Pointer to head of the atomic notifier chain + * + * Checks whether notifier chain is empty. + * + * Returns true is notifier chain is empty, false otherwise. + */ +bool atomic_notifier_call_chain_is_empty(struct atomic_notifier_head *nh) +{ + return !rcu_access_pointer(nh->head); +} + /* * Blocking notifier chain routines. All access to the chain is * synchronized by an rwsem. */ +static int __blocking_notifier_chain_register(struct blocking_notifier_head *nh, + struct notifier_block *n, + bool unique_priority) +{ + int ret; + + /* + * This code gets used during boot-up, when task switching is + * not yet working and interrupts must remain disabled. At + * such times we must not call down_write(). + */ + if (unlikely(system_state == SYSTEM_BOOTING)) + return notifier_chain_register(&nh->head, n, unique_priority); + + down_write(&nh->rwsem); + ret = notifier_chain_register(&nh->head, n, unique_priority); + up_write(&nh->rwsem); + return ret; +} + /** * blocking_notifier_chain_register - Add notifier to a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain @@ -222,23 +281,27 @@ NOKPROBE_SYMBOL(atomic_notifier_call_chain); int blocking_notifier_chain_register(struct blocking_notifier_head *nh, struct notifier_block *n) { - int ret; - - /* - * This code gets used during boot-up, when task switching is - * not yet working and interrupts must remain disabled. At - * such times we must not call down_write(). - */ - if (unlikely(system_state == SYSTEM_BOOTING)) - return notifier_chain_register(&nh->head, n); - - down_write(&nh->rwsem); - ret = notifier_chain_register(&nh->head, n); - up_write(&nh->rwsem); - return ret; + return __blocking_notifier_chain_register(nh, n, false); } EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); +/** + * blocking_notifier_chain_register_unique_prio - Add notifier to a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to an blocking notifier chain if there is no other + * notifier registered using the same priority. + * + * Returns 0 on success, %-EEXIST or %-EBUSY on error. + */ +int blocking_notifier_chain_register_unique_prio(struct blocking_notifier_head *nh, + struct notifier_block *n) +{ + return __blocking_notifier_chain_register(nh, n, true); +} +EXPORT_SYMBOL_GPL(blocking_notifier_chain_register_unique_prio); + /** * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain @@ -341,7 +404,7 @@ EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); int raw_notifier_chain_register(struct raw_notifier_head *nh, struct notifier_block *n) { - return notifier_chain_register(&nh->head, n); + return notifier_chain_register(&nh->head, n, false); } EXPORT_SYMBOL_GPL(raw_notifier_chain_register); @@ -420,10 +483,10 @@ int srcu_notifier_chain_register(struct srcu_notifier_head *nh, * such times we must not call mutex_lock(). */ if (unlikely(system_state == SYSTEM_BOOTING)) - return notifier_chain_register(&nh->head, n); + return notifier_chain_register(&nh->head, n, false); mutex_lock(&nh->mutex); - ret = notifier_chain_register(&nh->head, n); + ret = notifier_chain_register(&nh->head, n, false); mutex_unlock(&nh->mutex); return ret; } diff --git a/kernel/reboot.c b/kernel/reboot.c index 44228a93742b..a091145ee710 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -48,12 +48,20 @@ int reboot_cpu; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; -/* - * If set, this is used for preparing the system to power off. - */ +struct sys_off_handler { + struct notifier_block nb; + int (*sys_off_cb)(struct sys_off_data *data); + void *cb_data; + enum sys_off_mode mode; + bool blocking; + void *list; +}; -void (*pm_power_off_prepare)(void); -EXPORT_SYMBOL_GPL(pm_power_off_prepare); +/* + * Temporary stub that prevents linkage failure while we're in process + * of removing all uses of legacy pm_power_off() around the kernel. + */ +void __weak (*pm_power_off)(void); /** * emergency_restart - reboot the system @@ -281,6 +289,316 @@ void kernel_halt(void) } EXPORT_SYMBOL_GPL(kernel_halt); +/* + * Notifier list for kernel code which wants to be called + * to prepare system for power off. + */ +static BLOCKING_NOTIFIER_HEAD(power_off_prep_handler_list); + +/* + * Notifier list for kernel code which wants to be called + * to power off system. + */ +static ATOMIC_NOTIFIER_HEAD(power_off_handler_list); + +static int sys_off_notify(struct notifier_block *nb, + unsigned long mode, void *cmd) +{ + struct sys_off_handler *handler; + struct sys_off_data data = {}; + + handler = container_of(nb, struct sys_off_handler, nb); + data.cb_data = handler->cb_data; + data.mode = mode; + data.cmd = cmd; + + return handler->sys_off_cb(&data); +} + +/** + * register_sys_off_handler - Register sys-off handler + * @mode: Sys-off mode + * @priority: Handler priority + * @callback: Callback function + * @cb_data: Callback argument + * + * Registers system power-off or restart handler that will be invoked + * at the step corresponding to the given sys-off mode. Handler's callback + * should return NOTIFY_DONE to permit execution of the next handler in + * the call chain or NOTIFY_STOP to break the chain (in error case for + * example). + * + * Multiple handlers can be registered at the default priority level. + * + * Only one handler can be registered at the non-default priority level, + * otherwise ERR_PTR(-EBUSY) is returned. + * + * Returns a new instance of struct sys_off_handler on success, or + * an ERR_PTR()-encoded error code otherwise. + */ +struct sys_off_handler * +register_sys_off_handler(enum sys_off_mode mode, + int priority, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + struct sys_off_handler *handler; + int err; + + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler) + return ERR_PTR(-ENOMEM); + + switch (mode) { + case SYS_OFF_MODE_POWER_OFF_PREPARE: + handler->list = &power_off_prep_handler_list; + handler->blocking = true; + break; + + case SYS_OFF_MODE_POWER_OFF: + handler->list = &power_off_handler_list; + break; + + case SYS_OFF_MODE_RESTART: + handler->list = &restart_handler_list; + break; + + default: + kfree(handler); + return ERR_PTR(-EINVAL); + } + + handler->nb.notifier_call = sys_off_notify; + handler->nb.priority = priority; + handler->sys_off_cb = callback; + handler->cb_data = cb_data; + handler->mode = mode; + + if (handler->blocking) { + if (priority == SYS_OFF_PRIO_DEFAULT) + err = blocking_notifier_chain_register(handler->list, + &handler->nb); + else + err = blocking_notifier_chain_register_unique_prio(handler->list, + &handler->nb); + } else { + if (priority == SYS_OFF_PRIO_DEFAULT) + err = atomic_notifier_chain_register(handler->list, + &handler->nb); + else + err = atomic_notifier_chain_register_unique_prio(handler->list, + &handler->nb); + } + + if (err) { + kfree(handler); + return ERR_PTR(err); + } + + return handler; +} +EXPORT_SYMBOL_GPL(register_sys_off_handler); + +/** + * unregister_sys_off_handler - Unregister sys-off handler + * @handler: Sys-off handler + * + * Unregisters given sys-off handler. + */ +void unregister_sys_off_handler(struct sys_off_handler *handler) +{ + int err; + + if (!handler) + return; + + if (handler->blocking) + err = blocking_notifier_chain_unregister(handler->list, + &handler->nb); + else + err = atomic_notifier_chain_unregister(handler->list, + &handler->nb); + + /* sanity check, shall never happen */ + WARN_ON(err); + + kfree(handler); +} +EXPORT_SYMBOL_GPL(unregister_sys_off_handler); + +static void devm_unregister_sys_off_handler(void *data) +{ + struct sys_off_handler *handler = data; + + unregister_sys_off_handler(handler); +} + +/** + * devm_register_sys_off_handler - Register sys-off handler + * @dev: Device that registers handler + * @mode: Sys-off mode + * @priority: Handler priority + * @callback: Callback function + * @cb_data: Callback argument + * + * Registers resource-managed sys-off handler. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_sys_off_handler(struct device *dev, + enum sys_off_mode mode, + int priority, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + struct sys_off_handler *handler; + + handler = register_sys_off_handler(mode, priority, callback, cb_data); + if (IS_ERR(handler)) + return PTR_ERR(handler); + + return devm_add_action_or_reset(dev, devm_unregister_sys_off_handler, + handler); +} +EXPORT_SYMBOL_GPL(devm_register_sys_off_handler); + +/** + * devm_register_power_off_handler - Register power-off handler + * @dev: Device that registers callback + * @callback: Callback function + * @cb_data: Callback's argument + * + * Registers resource-managed sys-off handler with a default priority + * and using power-off mode. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_power_off_handler(struct device *dev, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + return devm_register_sys_off_handler(dev, + SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + callback, cb_data); +} +EXPORT_SYMBOL_GPL(devm_register_power_off_handler); + +/** + * devm_register_restart_handler - Register restart handler + * @dev: Device that registers callback + * @callback: Callback function + * @cb_data: Callback's argument + * + * Registers resource-managed sys-off handler with a default priority + * and using restart mode. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_restart_handler(struct device *dev, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + return devm_register_sys_off_handler(dev, + SYS_OFF_MODE_RESTART, + SYS_OFF_PRIO_DEFAULT, + callback, cb_data); +} +EXPORT_SYMBOL_GPL(devm_register_restart_handler); + +static struct sys_off_handler *platform_power_off_handler; + +static int platform_power_off_notify(struct sys_off_data *data) +{ + void (*platform_power_power_off_cb)(void) = data->cb_data; + + platform_power_power_off_cb(); + + return NOTIFY_DONE; +} + +/** + * register_platform_power_off - Register platform-level power-off callback + * @power_off: Power-off callback + * + * Registers power-off callback that will be called as last step + * of the power-off sequence. This callback is expected to be invoked + * for the last resort. Only one platform power-off callback is allowed + * to be registered at a time. + * + * Returns zero on success, or error code on failure. + */ +int register_platform_power_off(void (*power_off)(void)) +{ + struct sys_off_handler *handler; + + handler = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_PLATFORM, + platform_power_off_notify, + power_off); + if (IS_ERR(handler)) + return PTR_ERR(handler); + + platform_power_off_handler = handler; + + return 0; +} +EXPORT_SYMBOL_GPL(register_platform_power_off); + +/** + * unregister_platform_power_off - Unregister platform-level power-off callback + * @power_off: Power-off callback + * + * Unregisters previously registered platform power-off callback. + */ +void unregister_platform_power_off(void (*power_off)(void)) +{ + if (platform_power_off_handler && + platform_power_off_handler->cb_data == power_off) { + unregister_sys_off_handler(platform_power_off_handler); + platform_power_off_handler = NULL; + } +} +EXPORT_SYMBOL_GPL(unregister_platform_power_off); + +static int legacy_pm_power_off(struct sys_off_data *data) +{ + if (pm_power_off) + pm_power_off(); + + return NOTIFY_DONE; +} + +static void do_kernel_power_off_prepare(void) +{ + blocking_notifier_call_chain(&power_off_prep_handler_list, 0, NULL); +} + +/** + * do_kernel_power_off - Execute kernel power-off handler call chain + * + * Expected to be called as last step of the power-off sequence. + * + * Powers off the system immediately if a power-off handler function has + * been registered. Otherwise does nothing. + */ +void do_kernel_power_off(void) +{ + atomic_notifier_call_chain(&power_off_handler_list, 0, NULL); +} + +/** + * kernel_can_power_off - check whether system can be powered off + * + * Returns true if power-off handler is registered and system can be + * powered off, false otherwise. + */ +bool kernel_can_power_off(void) +{ + return !atomic_notifier_call_chain_is_empty(&power_off_handler_list); +} +EXPORT_SYMBOL_GPL(kernel_can_power_off); + /** * kernel_power_off - power_off the system * @@ -289,8 +607,7 @@ EXPORT_SYMBOL_GPL(kernel_halt); void kernel_power_off(void) { kernel_shutdown_prepare(SYSTEM_POWER_OFF); - if (pm_power_off_prepare) - pm_power_off_prepare(); + do_kernel_power_off_prepare(); migrate_to_reboot_cpu(); syscore_shutdown(); pr_emerg("Power down\n"); @@ -313,6 +630,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg) { struct pid_namespace *pid_ns = task_active_pid_ns(current); + struct sys_off_handler *sys_off = NULL; char buffer[256]; int ret = 0; @@ -337,10 +655,25 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, if (ret) return ret; + /* + * Register sys-off handlers for legacy PM callback. This allows + * legacy PM callbacks temporary co-exist with the new sys-off API. + * + * TODO: Remove legacy handlers once all legacy PM users will be + * switched to the sys-off based APIs. + */ + if (pm_power_off) { + sys_off = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + legacy_pm_power_off, NULL); + if (IS_ERR(sys_off)) + return PTR_ERR(sys_off); + } + /* Instead of trying to make the power_off code look like * halt when pm_power_off is not set do it the easy way. */ - if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) + if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !kernel_can_power_off()) cmd = LINUX_REBOOT_CMD_HALT; mutex_lock(&system_transition_mutex); @@ -394,6 +727,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, break; } mutex_unlock(&system_transition_mutex); + unregister_sys_off_handler(sys_off); return ret; }