From 34e2c555f3e13c90e9284e23d00f03be8a6e06c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 20:20:42 +0100 Subject: [PATCH 01/61] cpufreq: Add mechanism for registering utilization update callbacks Introduce a mechanism by which parts of the cpufreq subsystem ("setpolicy" drivers or the core) can register callbacks to be executed from cpufreq_update_util() which is invoked by the scheduler's update_load_avg() on CPU utilization changes. This allows the "setpolicy" drivers to dispense with their timers and do all of the computations they need and frequency/voltage adjustments in the update_load_avg() code path, among other things. The update_load_avg() changes were suggested by Peter Zijlstra. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) Acked-by: Ingo Molnar --- drivers/cpufreq/cpufreq.c | 45 +++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 34 +++++++++++++++++++++++++++++ kernel/sched/deadline.c | 4 ++++ kernel/sched/fair.c | 26 +++++++++++++++++++++- kernel/sched/rt.c | 4 ++++ kernel/sched/sched.h | 1 + 6 files changed, 113 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 34b17447e0d19..e172b2a02c1d4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -102,6 +102,51 @@ static LIST_HEAD(cpufreq_governor_list); static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); + +static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); + +/** + * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. + * @cpu: The CPU to set the pointer for. + * @data: New pointer value. + * + * Set and publish the update_util_data pointer for the given CPU. That pointer + * points to a struct update_util_data object containing a callback function + * to call from cpufreq_update_util(). That function will be called from an RCU + * read-side critical section, so it must not sleep. + * + * Callers must use RCU callbacks to free any memory that might be accessed + * via the old update_util_data pointer or invoke synchronize_rcu() right after + * this function to avoid use-after-free. + */ +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) +{ + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); +} +EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); + +/** + * cpufreq_update_util - Take a note about CPU utilization changes. + * @time: Current time. + * @util: Current utilization. + * @max: Utilization ceiling. + * + * This function is called by the scheduler on every invocation of + * update_load_avg() on the CPU whose utilization is being updated. + */ +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) +{ + struct update_util_data *data; + + rcu_read_lock(); + + data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data && data->func) + data->func(data, time, util, max); + + rcu_read_unlock(); +} + DEFINE_MUTEX(cpufreq_governor_lock); /* Flag to suspend/resume CPUFreq governors */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d0bf555b6bbfb..704d85bf72423 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); + +/** + * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. + * @time: Current time. + * + * The way cpufreq is currently arranged requires it to evaluate the CPU + * performance state (frequency/voltage) on a regular basis to prevent it from + * being stuck in a completely inadequate performance level for too long. + * That is not guaranteed to happen if the updates are only triggered from CFS, + * though, because they may not be coming in if RT or deadline tasks are active + * all the time (or there are RT and DL tasks only). + * + * As a workaround for that issue, this function is called by the RT and DL + * sched classes to trigger extra cpufreq updates to prevent it from stalling, + * but that really is a band-aid. Going forward it should be replaced with + * solutions targeted more specifically at RT and DL tasks. + */ +static inline void cpufreq_trigger_update(u64 time) +{ + cpufreq_update_util(time, ULONG_MAX, 0); +} + +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); + unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else +static inline void cpufreq_update_util(u64 time, unsigned long util, + unsigned long max) {} +static inline void cpufreq_trigger_update(u64 time) {} + static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index cd64c979d0e18..21a0aa6f810d8 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq) if (!dl_task(curr) || !on_dl_rq(dl_se)) return; + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + /* * Consumed budget is computed considering the time as * observed by schedulable tasks (excluding time spent diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 56b7d4b839476..e2987a7e489dd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) { struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 now = cfs_rq_clock_task(cfs_rq); - int cpu = cpu_of(rq_of(cfs_rq)); + struct rq *rq = rq_of(cfs_rq); + int cpu = cpu_of(rq); /* * Track task load average for carrying it to new CPU after migrated, and @@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) update_tg_load_avg(cfs_rq, 0); + + if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { + unsigned long max = rq->cpu_capacity_orig; + + /* + * There are a few boundary cases this might miss but it should + * get called often enough that that should (hopefully) not be + * a real problem -- added to that it only calls on the local + * CPU, so if we enqueue remotely we'll miss an update, but + * the next tick/schedule should update. + * + * It will not get called when we go idle, because the idle + * thread is a different class (!fair), nor will the utilization + * number include things like RT tasks. + * + * As is, the util number is not freq-invariant (we'd have to + * implement arch_scale_freq_capacity() for that). + * + * See cpu_util(). + */ + cpufreq_update_util(rq_clock(rq), + min(cfs_rq->avg.util_avg, max), max); + } } static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8ec86abe0ea18..27f5b03cbdbed 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq) if (curr->sched_class != &rt_sched_class) return; + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + delta_exec = rq_clock_task(rq) - curr->se.exec_start; if (unlikely((s64)delta_exec <= 0)) return; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 10f16374df7f3..f042190c80021 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "cpupri.h" #include "cpudeadline.h" From a4675fbc4a7abe072ac6ba38c252f22a91ebcd94 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 01:45:30 +0100 Subject: [PATCH 02/61] cpufreq: intel_pstate: Replace timers with utilization update callbacks Instead of using a per-CPU deferrable timer for utilization sampling and P-states adjustments, register a utilization update callback that will be invoked from the scheduler on utilization changes. The sampling rate is still the same as what was used for the deferrable timers, so the functional impact of this patch should not be significant. Based on an earlier patch from Srinivas Pandruvada. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 103 +++++++++++++-------------------- 1 file changed, 39 insertions(+), 64 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cd83d477e32d4..f4d85c2ae7b18 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -71,7 +71,7 @@ struct sample { u64 mperf; u64 tsc; int freq; - ktime_t time; + u64 time; }; struct pstate_data { @@ -103,13 +103,13 @@ struct _pid { struct cpudata { int cpu; - struct timer_list timer; + struct update_util_data update_util; struct pstate_data pstate; struct vid_data vid; struct _pid pid; - ktime_t last_sample_time; + u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; @@ -120,6 +120,7 @@ struct cpudata { static struct cpudata **all_cpu_data; struct pstate_adjust_policy { int sample_rate_ms; + s64 sample_rate_ns; int deadband; int setpoint; int p_gain_pct; @@ -712,7 +713,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) if (limits->no_turbo && !limits->turbo_disabled) val |= (u64)1 << 32; - wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); + wrmsrl(MSR_IA32_PERF_CTL, val); } static int knl_get_turbo_pstate(void) @@ -883,7 +884,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) sample->core_pct_busy = (int32_t)core_pct; } -static inline void intel_pstate_sample(struct cpudata *cpu) +static inline void intel_pstate_sample(struct cpudata *cpu, u64 time) { u64 aperf, mperf; unsigned long flags; @@ -900,7 +901,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; - cpu->sample.time = ktime_get(); + cpu->sample.time = time; cpu->sample.aperf = aperf; cpu->sample.mperf = mperf; cpu->sample.tsc = tsc; @@ -915,22 +916,6 @@ static inline void intel_pstate_sample(struct cpudata *cpu) cpu->prev_tsc = tsc; } -static inline void intel_hwp_set_sample_time(struct cpudata *cpu) -{ - int delay; - - delay = msecs_to_jiffies(50); - mod_timer_pinned(&cpu->timer, jiffies + delay); -} - -static inline void intel_pstate_set_sample_time(struct cpudata *cpu) -{ - int delay; - - delay = msecs_to_jiffies(pid_params.sample_rate_ms); - mod_timer_pinned(&cpu->timer, jiffies + delay); -} - static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -970,8 +955,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; - s64 duration_us; - u32 sample_time; + u64 duration_ns; /* * core_busy is the ratio of actual performance to max @@ -990,18 +974,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); /* - * Since we have a deferred timer, it will not fire unless - * we are in C0. So, determine if the actual elapsed time - * is significantly greater (3x) than our sample interval. If it - * is, then we were idle for a long enough period of time - * to adjust our busyness. + * Since our utilization update callback will not run unless we are + * in C0, check if the actual elapsed time is significantly greater (3x) + * than our sample interval. If it is, then we were idle for a long + * enough period of time to adjust our busyness. */ - sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; - duration_us = ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); - if (duration_us > sample_time * 3) { - sample_ratio = div_fp(int_tofp(sample_time), - int_tofp(duration_us)); + duration_ns = cpu->sample.time - cpu->last_sample_time; + if ((s64)duration_ns > pid_params.sample_rate_ns * 3 + && cpu->last_sample_time > 0) { + sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), + int_tofp(duration_ns)); core_busy = mul_fp(core_busy, sample_ratio); } @@ -1031,23 +1013,17 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->freq); } -static void intel_hwp_timer_func(unsigned long __data) -{ - struct cpudata *cpu = (struct cpudata *) __data; - - intel_pstate_sample(cpu); - intel_hwp_set_sample_time(cpu); -} - -static void intel_pstate_timer_func(unsigned long __data) +static void intel_pstate_update_util(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max) { - struct cpudata *cpu = (struct cpudata *) __data; - - intel_pstate_sample(cpu); + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; - intel_pstate_adjust_busy_pstate(cpu); - - intel_pstate_set_sample_time(cpu); + if ((s64)delta_ns >= pid_params.sample_rate_ns) { + intel_pstate_sample(cpu, time); + if (!hwp_active) + intel_pstate_adjust_busy_pstate(cpu); + } } #define ICPU(model, policy) \ @@ -1095,24 +1071,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu->cpu = cpunum; - if (hwp_active) + if (hwp_active) { intel_pstate_hwp_enable(cpu); + pid_params.sample_rate_ms = 50; + pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; + } intel_pstate_get_cpu_pstates(cpu); - init_timer_deferrable(&cpu->timer); - cpu->timer.data = (unsigned long)cpu; - cpu->timer.expires = jiffies + HZ/100; - - if (!hwp_active) - cpu->timer.function = intel_pstate_timer_func; - else - cpu->timer.function = intel_hwp_timer_func; - intel_pstate_busy_pid_reset(cpu); - intel_pstate_sample(cpu); + intel_pstate_sample(cpu, 0); - add_timer_on(&cpu->timer, cpunum); + cpu->update_util.func = intel_pstate_update_util; + cpufreq_set_update_util_data(cpunum, &cpu->update_util); pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); @@ -1196,7 +1167,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); - del_timer_sync(&all_cpu_data[cpu_num]->timer); + cpufreq_set_update_util_data(cpu_num, NULL); + synchronize_rcu(); + if (hwp_active) return; @@ -1260,6 +1233,7 @@ static int intel_pstate_msrs_not_valid(void) static void copy_pid_params(struct pstate_adjust_policy *policy) { pid_params.sample_rate_ms = policy->sample_rate_ms; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; pid_params.p_gain_pct = policy->p_gain_pct; pid_params.i_gain_pct = policy->i_gain_pct; pid_params.d_gain_pct = policy->d_gain_pct; @@ -1451,7 +1425,8 @@ static int __init intel_pstate_init(void) get_online_cpus(); for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { - del_timer_sync(&all_cpu_data[cpu]->timer); + cpufreq_set_update_util_data(cpu, NULL); + synchronize_rcu(); kfree(all_cpu_data[cpu]); } } From 9be4fd2c7723a3057b0b39676fe4c8d5fd7118a4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Feb 2016 16:53:50 +0100 Subject: [PATCH 03/61] cpufreq: governor: Replace timers with utilization update callbacks Instead of using a per-CPU deferrable timer for queuing up governor work items, register a utilization update callback that will be invoked from the scheduler on utilization changes. The sampling rate is still the same as what was used for the deferrable timers and the added irq_work overhead should be offset by the eliminated timers overhead, so in theory the functional impact of this patch should not be significant. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Tested-by: Gautham R. Shenoy --- drivers/cpufreq/Kconfig | 1 + drivers/cpufreq/cpufreq_conservative.c | 6 +- drivers/cpufreq/cpufreq_governor.c | 165 ++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 19 +-- drivers/cpufreq/cpufreq_ondemand.c | 43 ++++--- 5 files changed, 114 insertions(+), 120 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 659879a56dbac..dcb972a38fbc8 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -3,6 +3,7 @@ menu "CPU Frequency scaling" config CPU_FREQ bool "CPU Frequency scaling" select SRCU + select IRQ_WORK help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 8504a70a47857..bc002c8cba900 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -112,14 +112,12 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all) +static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - if (modify_all) - dbs_check_cpu(dbs_data, policy->cpu); - + dbs_check_cpu(dbs_data, policy->cpu); return delay_for_sampling_rate(cs_tuners->sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e0d111024d484..6bc2f50cc1d97 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -128,10 +128,10 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) * dropped down. So we perform the copy only once, upon the * first wake-up from idle.) * - * Detecting this situation is easy: the governor's deferrable - * timer would not have fired during CPU-idle periods. Hence - * an unusually large 'wall_time' (as compared to the sampling - * rate) indicates this scenario. + * Detecting this situation is easy: the governor's utilization + * update handler would not have run during CPU-idle periods. + * Hence, an unusually large 'wall_time' (as compared to the + * sampling rate) indicates this scenario. * * prev_load can be zero in two cases and we must recalculate it * for both cases: @@ -161,72 +161,48 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay) +void gov_set_update_util(struct cpu_common_dbs_info *shared, + unsigned int delay_us) { + struct cpufreq_policy *policy = shared->policy; struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs; int cpu; + gov_update_sample_delay(shared, delay_us); + shared->last_sample_time = 0; + for_each_cpu(cpu, policy->cpus) { - cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - cdbs->timer.expires = jiffies + delay; - add_timer_on(&cdbs->timer, cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + + cpufreq_set_update_util_data(cpu, &cdbs->update_util); } } -EXPORT_SYMBOL_GPL(gov_add_timers); +EXPORT_SYMBOL_GPL(gov_set_update_util); -static inline void gov_cancel_timers(struct cpufreq_policy *policy) +static inline void gov_clear_update_util(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs; int i; - for_each_cpu(i, policy->cpus) { - cdbs = dbs_data->cdata->get_cpu_cdbs(i); - del_timer_sync(&cdbs->timer); - } + for_each_cpu(i, policy->cpus) + cpufreq_set_update_util_data(i, NULL); + + synchronize_rcu(); } -void gov_cancel_work(struct cpu_common_dbs_info *shared) +static void gov_cancel_work(struct cpu_common_dbs_info *shared) { - /* Tell dbs_timer_handler() to skip queuing up work items. */ + /* Tell dbs_update_util_handler() to skip queuing up work items. */ atomic_inc(&shared->skip_work); /* - * If dbs_timer_handler() is already running, it may not notice the - * incremented skip_work, so wait for it to complete to prevent its work - * item from being queued up after the cancel_work_sync() below. - */ - gov_cancel_timers(shared->policy); - /* - * In case dbs_timer_handler() managed to run and spawn a work item - * before the timers have been canceled, wait for that work item to - * complete and then cancel all of the timers set up by it. If - * dbs_timer_handler() runs again at that point, it will see the - * positive value of skip_work and won't spawn any more work items. + * If dbs_update_util_handler() is already running, it may not notice + * the incremented skip_work, so wait for it to complete to prevent its + * work item from being queued up after the cancel_work_sync() below. */ + gov_clear_update_util(shared->policy); + irq_work_sync(&shared->irq_work); cancel_work_sync(&shared->work); - gov_cancel_timers(shared->policy); atomic_set(&shared->skip_work, 0); } -EXPORT_SYMBOL_GPL(gov_cancel_work); - -/* Will return if we need to evaluate cpu load again or not */ -static bool need_load_eval(struct cpu_common_dbs_info *shared, - unsigned int sampling_rate) -{ - if (policy_is_shared(shared->policy)) { - ktime_t time_now = ktime_get(); - s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); - - /* Do nothing if we recently have sampled */ - if (delta_us < (s64)(sampling_rate / 2)) - return false; - else - shared->time_stamp = time_now; - } - - return true; -} static void dbs_work_handler(struct work_struct *work) { @@ -234,56 +210,70 @@ static void dbs_work_handler(struct work_struct *work) cpu_common_dbs_info, work); struct cpufreq_policy *policy; struct dbs_data *dbs_data; - unsigned int sampling_rate, delay; - bool eval_load; + unsigned int delay; policy = shared->policy; dbs_data = policy->governor_data; - /* Kill all timers */ - gov_cancel_timers(policy); - - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - - sampling_rate = cs_tuners->sampling_rate; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - - sampling_rate = od_tuners->sampling_rate; - } - - eval_load = need_load_eval(shared, sampling_rate); - /* - * Make sure cpufreq_governor_limits() isn't evaluating load in - * parallel. + * Make sure cpufreq_governor_limits() isn't evaluating load or the + * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&shared->timer_mutex); - delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load); + delay = dbs_data->cdata->gov_dbs_timer(policy); + shared->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&shared->timer_mutex); + /* + * If the atomic operation below is reordered with respect to the + * sample delay modification, the utilization update handler may end + * up using a stale sample delay value. + */ + smp_mb__before_atomic(); atomic_dec(&shared->skip_work); +} + +static void dbs_irq_work(struct irq_work *irq_work) +{ + struct cpu_common_dbs_info *shared; - gov_add_timers(policy, delay); + shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work); + schedule_work(&shared->work); } -static void dbs_timer_handler(unsigned long data) +static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared) { - struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data; +#ifdef CONFIG_SMP + irq_work_queue_on(&shared->irq_work, smp_processor_id()); +#else + irq_work_queue(&shared->irq_work); +#endif +} + +static void dbs_update_util_handler(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max) +{ + struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct cpu_common_dbs_info *shared = cdbs->shared; /* - * Timer handler may not be allowed to queue the work at the moment, - * because: - * - Another timer handler has done that - * - We are stopping the governor - * - Or we are updating the sampling rate of the ondemand governor + * The work may not be allowed to be queued up right now. + * Possible reasons: + * - Work has already been queued up or is in progress. + * - The governor is being stopped. + * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&shared->skip_work) > 1) - atomic_dec(&shared->skip_work); - else - queue_work(system_wq, &shared->work); + if (atomic_inc_return(&shared->skip_work) == 1) { + u64 delta_ns; + + delta_ns = time - shared->last_sample_time; + if ((s64)delta_ns >= shared->sample_delay_ns) { + shared->last_sample_time = time; + gov_queue_irq_work(shared); + return; + } + } + atomic_dec(&shared->skip_work); } static void set_sampling_rate(struct dbs_data *dbs_data, @@ -315,6 +305,7 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy, mutex_init(&shared->timer_mutex); atomic_set(&shared->skip_work, 0); + init_irq_work(&shared->irq_work, dbs_irq_work); INIT_WORK(&shared->work, dbs_work_handler); return 0; } @@ -467,9 +458,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, io_busy = od_tuners->io_is_busy; } - shared->policy = policy; - shared->time_stamp = ktime_get(); - for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; @@ -485,10 +473,9 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - __setup_timer(&j_cdbs->timer, dbs_timer_handler, - (unsigned long)j_cdbs, - TIMER_DEFERRABLE | TIMER_IRQSAFE); + j_cdbs->update_util.func = dbs_update_util_handler; } + shared->policy = policy; if (cdata->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -505,7 +492,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, od_ops->powersave_bias_init_cpu(cpu); } - gov_add_timers(policy, delay_for_sampling_rate(sampling_rate)); + gov_set_update_util(shared, sampling_rate); return 0; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 91e767a058a76..541777192dbc1 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -18,6 +18,7 @@ #define _CPUFREQ_GOVERNOR_H #include +#include #include #include #include @@ -138,11 +139,19 @@ struct cpu_common_dbs_info { */ struct mutex timer_mutex; - ktime_t time_stamp; + u64 last_sample_time; + s64 sample_delay_ns; atomic_t skip_work; + struct irq_work irq_work; struct work_struct work; }; +static inline void gov_update_sample_delay(struct cpu_common_dbs_info *shared, + unsigned int delay_us) +{ + shared->sample_delay_ns = delay_us * NSEC_PER_USEC; +} + /* Per cpu structures */ struct cpu_dbs_info { u64 prev_cpu_idle; @@ -155,7 +164,7 @@ struct cpu_dbs_info { * wake-up from idle. */ unsigned int prev_load; - struct timer_list timer; + struct update_util_data update_util; struct cpu_common_dbs_info *shared; }; @@ -212,8 +221,7 @@ struct common_dbs_data { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); - unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy, - bool modify_all); + unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); @@ -270,9 +278,6 @@ static ssize_t show_sampling_rate_min_gov_pol \ } extern struct mutex cpufreq_governor_lock; - -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay); -void gov_cancel_work(struct cpu_common_dbs_info *shared); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 929e193ac1c19..da7f3514d948c 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -189,7 +189,7 @@ static void od_check_cpu(int cpu, unsigned int load) } } -static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) +static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; unsigned int cpu = policy->cpu; @@ -198,9 +198,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; - if (!modify_all) - goto max_delay; - /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { @@ -216,7 +213,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) } } -max_delay: if (!delay) delay = delay_for_sampling_rate(od_tuners->sampling_rate * dbs_info->rate_mult); @@ -262,7 +258,6 @@ static void update_sampling_rate(struct dbs_data *dbs_data, struct od_cpu_dbs_info_s *dbs_info; struct cpu_dbs_info *cdbs; struct cpu_common_dbs_info *shared; - unsigned long next_sampling, appointed_at; dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cdbs = &dbs_info->cdbs; @@ -286,20 +281,28 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * policy will be governed by dbs_data, otherwise there can be * multiple policies that are governed by the same dbs_data. */ - if (dbs_data != policy->governor_data) - continue; - - /* - * Checking this for any CPU should be fine, timers for all of - * them are scheduled together. - */ - next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->cdbs.timer.expires; - - if (time_before(next_sampling, appointed_at)) { - gov_cancel_work(shared); - gov_add_timers(policy, usecs_to_jiffies(new_rate)); - + if (dbs_data == policy->governor_data) { + mutex_lock(&shared->timer_mutex); + /* + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), + * but that really doesn't matter. If the read returns + * a value that's too big, the sample will be skipped, + * but the next invocation of dbs_update_util_handler() + * (when the update has been completed) will take a + * sample. If the returned value is too small, the + * sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect + * immediately anyway. + * + * If this runs in parallel with dbs_work_handler(), we + * may end up overwriting the sample_delay_ns value that + * it has just written, but the difference should not be + * too big and it will be corrected next time a sample + * is taken, so it shouldn't be significant. + */ + gov_update_sample_delay(shared, new_rate); + mutex_unlock(&shared->timer_mutex); } } From 2bb8d94fb03f808022c620f54b602a1e26d5cbac Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:01:31 +0100 Subject: [PATCH 04/61] cpufreq: governor: Use common mutex for dbs_data protection Every governor relying on the common code in cpufreq_governor.c has to provide its own mutex in struct common_dbs_data. However, there actually is no need to have a separate mutex per governor for this purpose, they may be using the same global mutex just fine. Accordingly, introduce a single common mutex for that and drop the mutex field from struct common_dbs_data. That at least will ensure that the mutex is always present and initialized regardless of what the particular governors do. Another benefit is that the common code does not need a pointer to a governor-related structure to get to the mutex which sometimes helps. Finally, it makes the code generally easier to follow. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 1 - drivers/cpufreq/cpufreq_governor.c | 7 +++++-- drivers/cpufreq/cpufreq_governor.h | 6 +----- drivers/cpufreq/cpufreq_ondemand.c | 5 ++--- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index bc002c8cba900..8f0c3dbe28677 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -368,7 +368,6 @@ static struct common_dbs_data cs_dbs_cdata = { .gov_check_cpu = cs_check_cpu, .init = cs_init, .exit = cs_exit, - .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), }; static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6bc2f50cc1d97..f291fdd878ce4 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,6 +22,9 @@ #include "cpufreq_governor.h" +DEFINE_MUTEX(dbs_data_mutex); +EXPORT_SYMBOL_GPL(dbs_data_mutex); + static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) { if (have_governor_per_policy()) @@ -543,7 +546,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, int ret; /* Lock governor to block concurrent initialization of governor */ - mutex_lock(&cdata->mutex); + mutex_lock(&dbs_data_mutex); if (have_governor_per_policy()) dbs_data = policy->governor_data; @@ -576,7 +579,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, } unlock: - mutex_unlock(&cdata->mutex); + mutex_unlock(&dbs_data_mutex); return ret; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 541777192dbc1..a9df62e87fcb6 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -228,11 +228,6 @@ struct common_dbs_data { /* Governor specific ops, see below */ void *gov_ops; - - /* - * Protects governor's data (struct dbs_data and struct common_dbs_data) - */ - struct mutex mutex; }; /* Governor Per policy data */ @@ -277,6 +272,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ } +extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index da7f3514d948c..fac2f8f05bf88 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -249,7 +249,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, /* * Lock governor so that governor start/stop can't execute in parallel. */ - mutex_lock(&od_dbs_cdata.mutex); + mutex_lock(&dbs_data_mutex); cpumask_copy(&cpumask, cpu_online_mask); @@ -306,7 +306,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, } } - mutex_unlock(&od_dbs_cdata.mutex); + mutex_unlock(&dbs_data_mutex); } static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, @@ -552,7 +552,6 @@ static struct common_dbs_data od_dbs_cdata = { .gov_ops = &od_ops, .init = od_init, .exit = od_exit, - .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), }; static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, From 5da3dd1e00918a9ac4b83885453bfa9cad732b44 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 03:15:24 +0100 Subject: [PATCH 05/61] cpufreq: governor: Avoid passing dbs_data pointers around unnecessarily Do not pass struct dbs_data pointers to the family of functions implementing governor operations in cpufreq_governor.c as they can take that pointer from policy->governor by themselves. The cpufreq_governor_init() case is slightly more complicated, since policy->governor may be NULL when it is invoked, but then it can reach the pointer in question via its cdata argument just fine. While at it, rework cpufreq_governor_dbs() to avoid a pointless policy_governor check in the CPUFREQ_GOV_POLICY_INIT case. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 68 ++++++++++++------------------ 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index f291fdd878ce4..a329e1bcb6bc5 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -329,9 +329,9 @@ static void free_common_dbs_info(struct cpufreq_policy *policy, } static int cpufreq_governor_init(struct cpufreq_policy *policy, - struct dbs_data *dbs_data, struct common_dbs_data *cdata) { + struct dbs_data *dbs_data = cdata->gdbs_data; unsigned int latency; int ret; @@ -403,9 +403,9 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, return ret; } -static int cpufreq_governor_exit(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_exit(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct common_dbs_data *cdata = dbs_data->cdata; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); @@ -432,9 +432,9 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy, return 0; } -static int cpufreq_governor_start(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_start(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct common_dbs_data *cdata = dbs_data->cdata; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); @@ -499,9 +499,9 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, return 0; } -static int cpufreq_governor_stop(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_stop(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; @@ -515,9 +515,9 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, return 0; } -static int cpufreq_governor_limits(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_limits(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); @@ -542,45 +542,31 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy, int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event) { - struct dbs_data *dbs_data; - int ret; + int ret = -EINVAL; /* Lock governor to block concurrent initialization of governor */ mutex_lock(&dbs_data_mutex); - if (have_governor_per_policy()) - dbs_data = policy->governor_data; - else - dbs_data = cdata->gdbs_data; - - if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { - ret = -EINVAL; - goto unlock; - } - - switch (event) { - case CPUFREQ_GOV_POLICY_INIT: - ret = cpufreq_governor_init(policy, dbs_data, cdata); - break; - case CPUFREQ_GOV_POLICY_EXIT: - ret = cpufreq_governor_exit(policy, dbs_data); - break; - case CPUFREQ_GOV_START: - ret = cpufreq_governor_start(policy, dbs_data); - break; - case CPUFREQ_GOV_STOP: - ret = cpufreq_governor_stop(policy, dbs_data); - break; - case CPUFREQ_GOV_LIMITS: - ret = cpufreq_governor_limits(policy, dbs_data); - break; - default: - ret = -EINVAL; + if (event == CPUFREQ_GOV_POLICY_INIT) { + ret = cpufreq_governor_init(policy, cdata); + } else if (policy->governor_data) { + switch (event) { + case CPUFREQ_GOV_POLICY_EXIT: + ret = cpufreq_governor_exit(policy); + break; + case CPUFREQ_GOV_START: + ret = cpufreq_governor_start(policy); + break; + case CPUFREQ_GOV_STOP: + ret = cpufreq_governor_stop(policy); + break; + case CPUFREQ_GOV_LIMITS: + ret = cpufreq_governor_limits(policy); + break; + } } -unlock: mutex_unlock(&dbs_data_mutex); - return ret; } EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); From af926185231a6e30d11a6035410b61405e203c3b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 03:16:08 +0100 Subject: [PATCH 06/61] cpufreq: governor: Put governor structure into common_dbs_data For the ondemand and conservative governors (generally, governors that use the common code in cpufreq_governor.c), there are two static data structures representing the governor, the struct governor structure (the interface to the cpufreq core) and the struct common_dbs_data one (the interface to the cpufreq_governor.c code). There's no fundamental reason why those two structures have to be separate. Moreover, if the struct governor one is included into struct common_dbs_data, it will be possible to reach the latter from the policy via its policy->governor pointer, so it won't be necessary to pass a separate pointer to it around. For this reason, embed struct governor in struct common_dbs_data. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 78 ++++++++++++++------------ drivers/cpufreq/cpufreq_governor.h | 3 +- drivers/cpufreq/cpufreq_ondemand.c | 28 ++++----- 3 files changed, 58 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 8f0c3dbe28677..4597f7430c95d 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -23,16 +23,6 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - -static struct cpufreq_governor cpufreq_gov_conservative = { - .name = "conservative", - .governor = cs_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -122,30 +112,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - struct cs_cpu_dbs_info_s *dbs_info = - &per_cpu(cs_cpu_dbs_info, freq->cpu); - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); - - if (!policy) - return 0; - - /* policy isn't governed by conservative governor */ - if (policy->governor != &cpufreq_gov_conservative) - return 0; - - /* - * we only care if our internally tracked freq moves outside the 'valid' - * ranges of frequency available to us otherwise we do not change it - */ - if (dbs_info->requested_freq > policy->max - || dbs_info->requested_freq < policy->min) - dbs_info->requested_freq = freq->new; - - return 0; -} + void *data); static struct notifier_block cs_cpufreq_notifier_block = { .notifier_call = dbs_cpufreq_notifier, @@ -358,7 +325,16 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) define_get_cpu_dbs_routines(cs_cpu_dbs_info); +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + static struct common_dbs_data cs_dbs_cdata = { + .gov = { + .name = "conservative", + .governor = cs_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + }, .governor = GOV_CONSERVATIVE, .attr_group_gov_sys = &cs_attr_group_gov_sys, .attr_group_gov_pol = &cs_attr_group_gov_pol, @@ -370,20 +346,48 @@ static struct common_dbs_data cs_dbs_cdata = { .exit = cs_exit, }; +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_cdata.gov) + static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); } +static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cs_cpu_dbs_info_s *dbs_info = + &per_cpu(cs_cpu_dbs_info, freq->cpu); + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); + + if (!policy) + return 0; + + /* policy isn't governed by conservative governor */ + if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) + return 0; + + /* + * we only care if our internally tracked freq moves outside the 'valid' + * ranges of frequency available to us otherwise we do not change it + */ + if (dbs_info->requested_freq > policy->max + || dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = freq->new; + + return 0; +} + static int __init cpufreq_gov_dbs_init(void) { - return cpufreq_register_governor(&cpufreq_gov_conservative); + return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); } static void __exit cpufreq_gov_dbs_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_conservative); + cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); } MODULE_AUTHOR("Alexander Clouter "); @@ -395,7 +399,7 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE struct cpufreq_governor *cpufreq_default_governor(void) { - return &cpufreq_gov_conservative; + return CPU_FREQ_GOV_CONSERVATIVE; } fs_initcall(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index a9df62e87fcb6..2fa3cf1043145 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -206,7 +206,8 @@ struct cs_dbs_tuners { /* Common Governor data across policies */ struct dbs_data; struct common_dbs_data { - /* Common across governors */ + struct cpufreq_governor gov; + #define GOV_ONDEMAND 0 #define GOV_CONSERVATIVE 1 int governor; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index fac2f8f05bf88..836116cd4bad9 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -31,8 +31,6 @@ static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); static struct od_ops od_ops; -static struct cpufreq_governor cpufreq_gov_ondemand; - static unsigned int default_powersave_bias; static void ondemand_powersave_bias_init_cpu(int cpu) @@ -541,7 +539,16 @@ static struct od_ops od_ops = { .freq_increase = dbs_freq_increase, }; +static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + static struct common_dbs_data od_dbs_cdata = { + .gov = { + .name = "ondemand", + .governor = od_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + }, .governor = GOV_ONDEMAND, .attr_group_gov_sys = &od_attr_group_gov_sys, .attr_group_gov_pol = &od_attr_group_gov_pol, @@ -554,19 +561,14 @@ static struct common_dbs_data od_dbs_cdata = { .exit = od_exit, }; +#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_cdata.gov) + static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); } -static struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = od_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static void od_set_powersave_bias(unsigned int powersave_bias) { struct cpufreq_policy *policy; @@ -592,7 +594,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) policy = shared->policy; cpumask_or(&done, &done, policy->cpus); - if (policy->governor != &cpufreq_gov_ondemand) + if (policy->governor != CPU_FREQ_GOV_ONDEMAND) continue; dbs_data = policy->governor_data; @@ -620,12 +622,12 @@ EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); static int __init cpufreq_gov_dbs_init(void) { - return cpufreq_register_governor(&cpufreq_gov_ondemand); + return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); } static void __exit cpufreq_gov_dbs_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_ondemand); + cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); } MODULE_AUTHOR("Venkatesh Pallipadi "); @@ -637,7 +639,7 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND struct cpufreq_governor *cpufreq_default_governor(void) { - return &cpufreq_gov_ondemand; + return CPU_FREQ_GOV_ONDEMAND; } fs_initcall(cpufreq_gov_dbs_init); From 7bdad34d0890b69c30e8c6a50c9c2311a839fd68 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:05:07 +0100 Subject: [PATCH 07/61] cpufreq: governor: Rename some data types and variables The ondemand and conservative governors are represented by struct common_dbs_data whose name doesn't reflect the purpose it is used for, so rename it to struct dbs_governor and rename variables of that type accordingly. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 2 +- drivers/cpufreq/cpufreq_conservative.c | 8 +-- drivers/cpufreq/cpufreq_governor.c | 88 +++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 12 ++-- drivers/cpufreq/cpufreq_ondemand.c | 8 +-- 5 files changed, 59 insertions(+), 59 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index f6b79ab0070b5..a7d237b386d34 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -48,7 +48,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct dbs_data *od_data = policy->governor_data; struct od_dbs_tuners *od_tuners = od_data->tuners; struct od_cpu_dbs_info_s *od_info = - od_data->cdata->get_cpu_dbs_info_s(policy->cpu); + od_data->gov->get_cpu_dbs_info_s(policy->cpu); if (!od_info->freq_table) return freq_next; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4597f7430c95d..c65ac365a2dd8 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -119,7 +119,7 @@ static struct notifier_block cs_cpufreq_notifier_block = { }; /************************** sysfs interface ************************/ -static struct common_dbs_data cs_dbs_cdata; +static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) @@ -328,7 +328,7 @@ define_get_cpu_dbs_routines(cs_cpu_dbs_info); static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); -static struct common_dbs_data cs_dbs_cdata = { +static struct dbs_governor cs_dbs_gov = { .gov = { .name = "conservative", .governor = cs_cpufreq_governor_dbs, @@ -346,12 +346,12 @@ static struct common_dbs_data cs_dbs_cdata = { .exit = cs_exit, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_cdata.gov) +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); + return cpufreq_governor_dbs(policy, &cs_dbs_gov, event); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index a329e1bcb6bc5..dc5bb298b4498 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -28,14 +28,14 @@ EXPORT_SYMBOL_GPL(dbs_data_mutex); static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) { if (have_governor_per_policy()) - return dbs_data->cdata->attr_group_gov_pol; + return dbs_data->gov->attr_group_gov_pol; else - return dbs_data->cdata->attr_group_gov_sys; + return dbs_data->gov->attr_group_gov_sys; } void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpufreq_policy *policy = cdbs->shared->policy; @@ -44,9 +44,9 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int ignore_nice; unsigned int j; - if (dbs_data->cdata->governor == GOV_ONDEMAND) { + if (dbs_data->gov->governor == GOV_ONDEMAND) { struct od_cpu_dbs_info_s *od_dbs_info = - dbs_data->cdata->get_cpu_dbs_info_s(cpu); + dbs_data->gov->get_cpu_dbs_info_s(cpu); /* * Sometimes, the ondemand governor uses an additional @@ -71,7 +71,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int load; int io_busy = 0; - j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); + j_cdbs = dbs_data->gov->get_cpu_cdbs(j); /* * For the purpose of ondemand, waiting for disk IO is @@ -79,7 +79,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) * not that the system is actually idle. So do not add * the iowait time to the cpu idle time. */ - if (dbs_data->cdata->governor == GOV_ONDEMAND) + if (dbs_data->gov->governor == GOV_ONDEMAND) io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); @@ -160,7 +160,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) max_load = load; } - dbs_data->cdata->gov_check_cpu(cpu, max_load); + dbs_data->gov->gov_check_cpu(cpu, max_load); } EXPORT_SYMBOL_GPL(dbs_check_cpu); @@ -175,7 +175,7 @@ void gov_set_update_util(struct cpu_common_dbs_info *shared, shared->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); cpufreq_set_update_util_data(cpu, &cdbs->update_util); } @@ -223,7 +223,7 @@ static void dbs_work_handler(struct work_struct *work) * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&shared->timer_mutex); - delay = dbs_data->cdata->gov_dbs_timer(policy); + delay = dbs_data->gov->gov_dbs_timer(policy); shared->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&shared->timer_mutex); @@ -282,7 +282,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, static void set_sampling_rate(struct dbs_data *dbs_data, unsigned int sampling_rate) { - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + if (dbs_data->gov->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; cs_tuners->sampling_rate = sampling_rate; } else { @@ -292,7 +292,7 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } static int alloc_common_dbs_info(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) + struct dbs_governor *gov) { struct cpu_common_dbs_info *shared; int j; @@ -304,7 +304,7 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy, /* Set shared for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) - cdata->get_cpu_cdbs(j)->shared = shared; + gov->get_cpu_cdbs(j)->shared = shared; mutex_init(&shared->timer_mutex); atomic_set(&shared->skip_work, 0); @@ -314,24 +314,24 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy, } static void free_common_dbs_info(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) + struct dbs_governor *gov) { - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; int j; mutex_destroy(&shared->timer_mutex); for_each_cpu(j, policy->cpus) - cdata->get_cpu_cdbs(j)->shared = NULL; + gov->get_cpu_cdbs(j)->shared = NULL; kfree(shared); } static int cpufreq_governor_init(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) + struct dbs_governor *gov) { - struct dbs_data *dbs_data = cdata->gdbs_data; + struct dbs_data *dbs_data = gov->gdbs_data; unsigned int latency; int ret; @@ -343,7 +343,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (WARN_ON(have_governor_per_policy())) return -EINVAL; - ret = alloc_common_dbs_info(policy, cdata); + ret = alloc_common_dbs_info(policy, gov); if (ret) return ret; @@ -356,14 +356,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (!dbs_data) return -ENOMEM; - ret = alloc_common_dbs_info(policy, cdata); + ret = alloc_common_dbs_info(policy, gov); if (ret) goto free_dbs_data; - dbs_data->cdata = cdata; + dbs_data->gov = gov; dbs_data->usage_count = 1; - ret = cdata->init(dbs_data, !policy->governor->initialized); + ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) goto free_common_dbs_info; @@ -379,7 +379,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, latency * LATENCY_MULTIPLIER)); if (!have_governor_per_policy()) - cdata->gdbs_data = dbs_data; + gov->gdbs_data = dbs_data; policy->governor_data = dbs_data; @@ -394,10 +394,10 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, policy->governor_data = NULL; if (!have_governor_per_policy()) - cdata->gdbs_data = NULL; - cdata->exit(dbs_data, !policy->governor->initialized); + gov->gdbs_data = NULL; + gov->exit(dbs_data, !policy->governor->initialized); free_common_dbs_info: - free_common_dbs_info(policy, cdata); + free_common_dbs_info(policy, gov); free_dbs_data: kfree(dbs_data); return ret; @@ -406,8 +406,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, static int cpufreq_governor_exit(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct common_dbs_data *cdata = dbs_data->cdata; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct dbs_governor *gov = dbs_data->gov; + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to INIT */ if (!cdbs->shared || cdbs->shared->policy) @@ -420,24 +420,24 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; if (!have_governor_per_policy()) - cdata->gdbs_data = NULL; + gov->gdbs_data = NULL; - cdata->exit(dbs_data, policy->governor->initialized == 1); + gov->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); } else { policy->governor_data = NULL; } - free_common_dbs_info(policy, cdata); + free_common_dbs_info(policy, gov); return 0; } static int cpufreq_governor_start(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct common_dbs_data *cdata = dbs_data->cdata; + struct dbs_governor *gov = dbs_data->gov; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); struct cpu_common_dbs_info *shared = cdbs->shared; int io_busy = 0; @@ -448,7 +448,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (!shared || shared->policy) return -EBUSY; - if (cdata->governor == GOV_CONSERVATIVE) { + if (gov->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; sampling_rate = cs_tuners->sampling_rate; @@ -462,7 +462,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) } for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); unsigned int prev_load; j_cdbs->prev_cpu_idle = @@ -480,15 +480,15 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) } shared->policy = policy; - if (cdata->governor == GOV_CONSERVATIVE) { + if (gov->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = - cdata->get_cpu_dbs_info_s(cpu); + gov->get_cpu_dbs_info_s(cpu); cs_dbs_info->down_skip = 0; cs_dbs_info->requested_freq = policy->cur; } else { - struct od_ops *od_ops = cdata->gov_ops; - struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); + struct od_ops *od_ops = gov->gov_ops; + struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu); od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -502,7 +502,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); + struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; /* State should be equivalent to START */ @@ -518,9 +518,9 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct common_dbs_data *cdata = dbs_data->cdata; + struct dbs_governor *gov = dbs_data->gov; unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); /* State should be equivalent to START */ if (!cdbs->shared || !cdbs->shared->policy) @@ -540,7 +540,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) } int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct common_dbs_data *cdata, unsigned int event) + struct dbs_governor *gov, unsigned int event) { int ret = -EINVAL; @@ -548,7 +548,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_data_mutex); if (event == CPUFREQ_GOV_POLICY_INIT) { - ret = cpufreq_governor_init(policy, cdata); + ret = cpufreq_governor_init(policy, gov); } else if (policy->governor_data) { switch (event) { case CPUFREQ_GOV_POLICY_EXIT: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 2fa3cf1043145..ed87b84429850 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -78,7 +78,7 @@ __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) static ssize_t show_##file_name##_gov_sys \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ + struct _gov##_dbs_tuners *tuners = _gov##_dbs_gov.gdbs_data->tuners; \ return sprintf(buf, "%u\n", tuners->file_name); \ } \ \ @@ -94,7 +94,7 @@ static ssize_t show_##file_name##_gov_pol \ static ssize_t store_##file_name##_gov_sys \ (struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ { \ - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ return store_##file_name(dbs_data, buf, count); \ } \ \ @@ -205,7 +205,7 @@ struct cs_dbs_tuners { /* Common Governor data across policies */ struct dbs_data; -struct common_dbs_data { +struct dbs_governor { struct cpufreq_governor gov; #define GOV_ONDEMAND 0 @@ -233,7 +233,7 @@ struct common_dbs_data { /* Governor Per policy data */ struct dbs_data { - struct common_dbs_data *cdata; + struct dbs_governor *gov; unsigned int min_sampling_rate; int usage_count; void *tuners; @@ -262,7 +262,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) static ssize_t show_sampling_rate_min_gov_sys \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ } \ \ @@ -277,7 +277,7 @@ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct common_dbs_data *cdata, unsigned int event); + struct dbs_governor *gov, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 836116cd4bad9..c38a4a1111d48 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -219,7 +219,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) } /************************** sysfs interface ************************/ -static struct common_dbs_data od_dbs_cdata; +static struct dbs_governor od_dbs_gov; /** * update_sampling_rate - update sampling rate effective immediately if needed. @@ -542,7 +542,7 @@ static struct od_ops od_ops = { static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); -static struct common_dbs_data od_dbs_cdata = { +static struct dbs_governor od_dbs_gov = { .gov = { .name = "ondemand", .governor = od_cpufreq_governor_dbs, @@ -561,12 +561,12 @@ static struct common_dbs_data od_dbs_cdata = { .exit = od_exit, }; -#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_cdata.gov) +#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); + return cpufreq_governor_dbs(policy, &od_dbs_gov, event); } static void od_set_powersave_bias(unsigned int powersave_bias) From 906a6e5aaef24d3c80bf6a06c794c7541aca64be Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:07:51 +0100 Subject: [PATCH 08/61] cpufreq: governor: Rework cpufreq_governor_dbs() Since it is possible to obtain a pointer to struct dbs_governor from a pointer to the struct governor embedded in it via container_of(), the second argument of cpufreq_governor_init() is not necessary. Accordingly, cpufreq_governor_dbs() doesn't need its second argument either and the ->governor callbacks for both the ondemand and conservative governors may be set to cpufreq_governor_dbs() directly. Make that happen. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 11 +---------- drivers/cpufreq/cpufreq_governor.c | 10 +++++----- drivers/cpufreq/cpufreq_governor.h | 3 +-- drivers/cpufreq/cpufreq_ondemand.c | 11 +---------- 4 files changed, 8 insertions(+), 27 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c65ac365a2dd8..20c82913ef42d 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -325,13 +325,10 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) define_get_cpu_dbs_routines(cs_cpu_dbs_info); -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - static struct dbs_governor cs_dbs_gov = { .gov = { .name = "conservative", - .governor = cs_cpufreq_governor_dbs, + .governor = cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, @@ -348,12 +345,6 @@ static struct dbs_governor cs_dbs_gov = { #define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - return cpufreq_governor_dbs(policy, &cs_dbs_gov, event); -} - static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index dc5bb298b4498..7e579fc42d2a3 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -328,9 +328,10 @@ static void free_common_dbs_info(struct cpufreq_policy *policy, kfree(shared); } -static int cpufreq_governor_init(struct cpufreq_policy *policy, - struct dbs_governor *gov) +static int cpufreq_governor_init(struct cpufreq_policy *policy) { + struct dbs_governor *gov = container_of(policy->governor, + struct dbs_governor, gov); struct dbs_data *dbs_data = gov->gdbs_data; unsigned int latency; int ret; @@ -539,8 +540,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) return 0; } -int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct dbs_governor *gov, unsigned int event) +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { int ret = -EINVAL; @@ -548,7 +548,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_data_mutex); if (event == CPUFREQ_GOV_POLICY_INIT) { - ret = cpufreq_governor_init(policy, gov); + ret = cpufreq_governor_init(policy); } else if (policy->governor_data) { switch (event) { case CPUFREQ_GOV_POLICY_EXIT: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ed87b84429850..8e280b8c446a2 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -276,8 +276,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct dbs_governor *gov, unsigned int event); +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c38a4a1111d48..dcbcbf441ac15 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -539,13 +539,10 @@ static struct od_ops od_ops = { .freq_increase = dbs_freq_increase, }; -static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - static struct dbs_governor od_dbs_gov = { .gov = { .name = "ondemand", - .governor = od_cpufreq_governor_dbs, + .governor = cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, @@ -563,12 +560,6 @@ static struct dbs_governor od_dbs_gov = { #define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) -static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - return cpufreq_governor_dbs(policy, &od_dbs_gov, event); -} - static void od_set_powersave_bias(unsigned int powersave_bias) { struct cpufreq_policy *policy; From ea59ee0dc9796a4e879291cc2f4728d04c499313 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:09:51 +0100 Subject: [PATCH 09/61] cpufreq: governor: Drop the gov pointer from struct dbs_data Since it is possible to obtain a pointer to struct dbs_governor from a pointer to the struct governor embedded in it with the help of container_of(), the additional gov pointer in struct dbs_data isn't really necessary. Drop that pointer and make the code using it reach the dbs_governor object via policy->governor. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 2 +- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 63 ++++++++++++-------------- drivers/cpufreq/cpufreq_governor.h | 8 +++- drivers/cpufreq/cpufreq_ondemand.c | 2 +- 5 files changed, 39 insertions(+), 38 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index a7d237b386d34..6395a5f0ff251 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -48,7 +48,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct dbs_data *od_data = policy->governor_data; struct od_dbs_tuners *od_tuners = od_data->tuners; struct od_cpu_dbs_info_s *od_info = - od_data->gov->get_cpu_dbs_info_s(policy->cpu); + dbs_governor_of(policy)->get_cpu_dbs_info_s(policy->cpu); if (!od_info->freq_table) return freq_next; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 20c82913ef42d..7d5f181e1679c 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -107,7 +107,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - dbs_check_cpu(dbs_data, policy->cpu); + dbs_check_cpu(policy, policy->cpu); return delay_for_sampling_rate(cs_tuners->sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 7e579fc42d2a3..d3fa8b31015ca 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -25,28 +25,27 @@ DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); -static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) +static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov) { - if (have_governor_per_policy()) - return dbs_data->gov->attr_group_gov_pol; - else - return dbs_data->gov->attr_group_gov_sys; + return have_governor_per_policy() ? + gov->attr_group_gov_pol : gov->attr_group_gov_sys; } -void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) +void dbs_check_cpu(struct cpufreq_policy *policy, int cpu) { - struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); + struct dbs_governor *gov = dbs_governor_of(policy); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); + struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - struct cpufreq_policy *policy = cdbs->shared->policy; unsigned int sampling_rate; unsigned int max_load = 0; unsigned int ignore_nice; unsigned int j; - if (dbs_data->gov->governor == GOV_ONDEMAND) { + if (gov->governor == GOV_ONDEMAND) { struct od_cpu_dbs_info_s *od_dbs_info = - dbs_data->gov->get_cpu_dbs_info_s(cpu); + gov->get_cpu_dbs_info_s(cpu); /* * Sometimes, the ondemand governor uses an additional @@ -71,7 +70,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int load; int io_busy = 0; - j_cdbs = dbs_data->gov->get_cpu_cdbs(j); + j_cdbs = gov->get_cpu_cdbs(j); /* * For the purpose of ondemand, waiting for disk IO is @@ -79,7 +78,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) * not that the system is actually idle. So do not add * the iowait time to the cpu idle time. */ - if (dbs_data->gov->governor == GOV_ONDEMAND) + if (gov->governor == GOV_ONDEMAND) io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); @@ -160,7 +159,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) max_load = load; } - dbs_data->gov->gov_check_cpu(cpu, max_load); + gov->gov_check_cpu(cpu, max_load); } EXPORT_SYMBOL_GPL(dbs_check_cpu); @@ -168,14 +167,14 @@ void gov_set_update_util(struct cpu_common_dbs_info *shared, unsigned int delay_us) { struct cpufreq_policy *policy = shared->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct dbs_governor *gov = dbs_governor_of(policy); int cpu; gov_update_sample_delay(shared, delay_us); shared->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); cpufreq_set_update_util_data(cpu, &cdbs->update_util); } @@ -212,18 +211,18 @@ static void dbs_work_handler(struct work_struct *work) struct cpu_common_dbs_info *shared = container_of(work, struct cpu_common_dbs_info, work); struct cpufreq_policy *policy; - struct dbs_data *dbs_data; + struct dbs_governor *gov; unsigned int delay; policy = shared->policy; - dbs_data = policy->governor_data; + gov = dbs_governor_of(policy); /* * Make sure cpufreq_governor_limits() isn't evaluating load or the * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&shared->timer_mutex); - delay = dbs_data->gov->gov_dbs_timer(policy); + delay = gov->gov_dbs_timer(policy); shared->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&shared->timer_mutex); @@ -280,9 +279,10 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, } static void set_sampling_rate(struct dbs_data *dbs_data, - unsigned int sampling_rate) + struct dbs_governor *gov, + unsigned int sampling_rate) { - if (dbs_data->gov->governor == GOV_CONSERVATIVE) { + if (gov->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; cs_tuners->sampling_rate = sampling_rate; } else { @@ -330,8 +330,7 @@ static void free_common_dbs_info(struct cpufreq_policy *policy, static int cpufreq_governor_init(struct cpufreq_policy *policy) { - struct dbs_governor *gov = container_of(policy->governor, - struct dbs_governor, gov); + struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = gov->gdbs_data; unsigned int latency; int ret; @@ -361,7 +360,6 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (ret) goto free_dbs_data; - dbs_data->gov = gov; dbs_data->usage_count = 1; ret = gov->init(dbs_data, !policy->governor->initialized); @@ -376,7 +374,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); - set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, + set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER)); if (!have_governor_per_policy()) @@ -385,7 +383,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) policy->governor_data = dbs_data; ret = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); + get_sysfs_attr(gov)); if (ret) goto reset_gdbs_data; @@ -406,8 +404,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) static int cpufreq_governor_exit(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = policy->governor_data; - struct dbs_governor *gov = dbs_data->gov; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to INIT */ @@ -416,7 +414,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) if (!--dbs_data->usage_count) { sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); + get_sysfs_attr(gov)); policy->governor_data = NULL; @@ -435,8 +433,8 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) static int cpufreq_governor_start(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = policy->governor_data; - struct dbs_governor *gov = dbs_data->gov; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); struct cpu_common_dbs_info *shared = cdbs->shared; @@ -502,8 +500,8 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(policy->cpu); + struct dbs_governor *gov = dbs_governor_of(policy); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; /* State should be equivalent to START */ @@ -518,8 +516,7 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - struct dbs_governor *gov = dbs_data->gov; + struct dbs_governor *gov = dbs_governor_of(policy); unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); @@ -534,7 +531,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) else if (policy->min > cdbs->shared->policy->cur) __cpufreq_driver_target(cdbs->shared->policy, policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(dbs_data, cpu); + dbs_check_cpu(policy, cpu); mutex_unlock(&cdbs->shared->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8e280b8c446a2..c8b7ec22871cd 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -231,9 +231,13 @@ struct dbs_governor { void *gov_ops; }; +static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) +{ + return container_of(policy->governor, struct dbs_governor, gov); +} + /* Governor Per policy data */ struct dbs_data { - struct dbs_governor *gov; unsigned int min_sampling_rate; int usage_count; void *tuners; @@ -275,7 +279,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; -void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +void dbs_check_cpu(struct cpufreq_policy *policy, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index dcbcbf441ac15..65ad39d95e39c 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -203,7 +203,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { - dbs_check_cpu(dbs_data, cpu); + dbs_check_cpu(policy, cpu); if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; From e40e7b255e591d0448500c7910ec5693f58026bd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Feb 2016 17:07:44 +0100 Subject: [PATCH 10/61] cpufreq: governor: Rename cpu_common_dbs_info to policy_dbs_info The struct cpu_common_dbs_info structure represents the per-policy part of the governor data (for the ondemand and conservative governors), but its name doesn't reflect its purpose. Rename it to struct policy_dbs_info and rename variables related to it accordingly. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 134 ++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 8 +- drivers/cpufreq/cpufreq_ondemand.c | 28 +++--- 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 7d5f181e1679c..b2df5de6cf92e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index d3fa8b31015ca..b425cd3da6829 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -163,15 +163,15 @@ void dbs_check_cpu(struct cpufreq_policy *policy, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); -void gov_set_update_util(struct cpu_common_dbs_info *shared, +void gov_set_update_util(struct policy_dbs_info *policy_dbs, unsigned int delay_us) { - struct cpufreq_policy *policy = shared->policy; + struct cpufreq_policy *policy = policy_dbs->policy; struct dbs_governor *gov = dbs_governor_of(policy); int cpu; - gov_update_sample_delay(shared, delay_us); - shared->last_sample_time = 0; + gov_update_sample_delay(policy_dbs, delay_us); + policy_dbs->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); @@ -191,40 +191,40 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) synchronize_rcu(); } -static void gov_cancel_work(struct cpu_common_dbs_info *shared) +static void gov_cancel_work(struct policy_dbs_info *policy_dbs) { /* Tell dbs_update_util_handler() to skip queuing up work items. */ - atomic_inc(&shared->skip_work); + atomic_inc(&policy_dbs->skip_work); /* * If dbs_update_util_handler() is already running, it may not notice * the incremented skip_work, so wait for it to complete to prevent its * work item from being queued up after the cancel_work_sync() below. */ - gov_clear_update_util(shared->policy); - irq_work_sync(&shared->irq_work); - cancel_work_sync(&shared->work); - atomic_set(&shared->skip_work, 0); + gov_clear_update_util(policy_dbs->policy); + irq_work_sync(&policy_dbs->irq_work); + cancel_work_sync(&policy_dbs->work); + atomic_set(&policy_dbs->skip_work, 0); } static void dbs_work_handler(struct work_struct *work) { - struct cpu_common_dbs_info *shared = container_of(work, struct - cpu_common_dbs_info, work); + struct policy_dbs_info *policy_dbs; struct cpufreq_policy *policy; struct dbs_governor *gov; unsigned int delay; - policy = shared->policy; + policy_dbs = container_of(work, struct policy_dbs_info, work); + policy = policy_dbs->policy; gov = dbs_governor_of(policy); /* * Make sure cpufreq_governor_limits() isn't evaluating load or the * ondemand governor isn't updating the sampling rate in parallel. */ - mutex_lock(&shared->timer_mutex); + mutex_lock(&policy_dbs->timer_mutex); delay = gov->gov_dbs_timer(policy); - shared->sample_delay_ns = jiffies_to_nsecs(delay); - mutex_unlock(&shared->timer_mutex); + policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay); + mutex_unlock(&policy_dbs->timer_mutex); /* * If the atomic operation below is reordered with respect to the @@ -232,23 +232,23 @@ static void dbs_work_handler(struct work_struct *work) * up using a stale sample delay value. */ smp_mb__before_atomic(); - atomic_dec(&shared->skip_work); + atomic_dec(&policy_dbs->skip_work); } static void dbs_irq_work(struct irq_work *irq_work) { - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; - shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work); - schedule_work(&shared->work); + policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work); + schedule_work(&policy_dbs->work); } -static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared) +static inline void gov_queue_irq_work(struct policy_dbs_info *policy_dbs) { #ifdef CONFIG_SMP - irq_work_queue_on(&shared->irq_work, smp_processor_id()); + irq_work_queue_on(&policy_dbs->irq_work, smp_processor_id()); #else - irq_work_queue(&shared->irq_work); + irq_work_queue(&policy_dbs->irq_work); #endif } @@ -256,7 +256,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, unsigned long util, unsigned long max) { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; /* * The work may not be allowed to be queued up right now. @@ -265,17 +265,17 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * - The governor is being stopped. * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&shared->skip_work) == 1) { + if (atomic_inc_return(&policy_dbs->skip_work) == 1) { u64 delta_ns; - delta_ns = time - shared->last_sample_time; - if ((s64)delta_ns >= shared->sample_delay_ns) { - shared->last_sample_time = time; - gov_queue_irq_work(shared); + delta_ns = time - policy_dbs->last_sample_time; + if ((s64)delta_ns >= policy_dbs->sample_delay_ns) { + policy_dbs->last_sample_time = time; + gov_queue_irq_work(policy_dbs); return; } } - atomic_dec(&shared->skip_work); + atomic_dec(&policy_dbs->skip_work); } static void set_sampling_rate(struct dbs_data *dbs_data, @@ -291,41 +291,41 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } -static int alloc_common_dbs_info(struct cpufreq_policy *policy, +static int alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; int j; /* Allocate memory for the common information for policy->cpus */ - shared = kzalloc(sizeof(*shared), GFP_KERNEL); - if (!shared) + policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL); + if (!policy_dbs) return -ENOMEM; - /* Set shared for all CPUs, online+offline */ + /* Set policy_dbs for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) - gov->get_cpu_cdbs(j)->shared = shared; + gov->get_cpu_cdbs(j)->policy_dbs = policy_dbs; - mutex_init(&shared->timer_mutex); - atomic_set(&shared->skip_work, 0); - init_irq_work(&shared->irq_work, dbs_irq_work); - INIT_WORK(&shared->work, dbs_work_handler); + mutex_init(&policy_dbs->timer_mutex); + atomic_set(&policy_dbs->skip_work, 0); + init_irq_work(&policy_dbs->irq_work, dbs_irq_work); + INIT_WORK(&policy_dbs->work, dbs_work_handler); return 0; } -static void free_common_dbs_info(struct cpufreq_policy *policy, +static void free_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int j; - mutex_destroy(&shared->timer_mutex); + mutex_destroy(&policy_dbs->timer_mutex); for_each_cpu(j, policy->cpus) - gov->get_cpu_cdbs(j)->shared = NULL; + gov->get_cpu_cdbs(j)->policy_dbs = NULL; - kfree(shared); + kfree(policy_dbs); } static int cpufreq_governor_init(struct cpufreq_policy *policy) @@ -343,7 +343,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (WARN_ON(have_governor_per_policy())) return -EINVAL; - ret = alloc_common_dbs_info(policy, gov); + ret = alloc_policy_dbs_info(policy, gov); if (ret) return ret; @@ -356,7 +356,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!dbs_data) return -ENOMEM; - ret = alloc_common_dbs_info(policy, gov); + ret = alloc_policy_dbs_info(policy, gov); if (ret) goto free_dbs_data; @@ -364,7 +364,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) - goto free_common_dbs_info; + goto free_policy_dbs_info; /* policy latency is in ns. Convert it to us first */ latency = policy->cpuinfo.transition_latency / 1000; @@ -395,8 +395,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = NULL; gov->exit(dbs_data, !policy->governor->initialized); -free_common_dbs_info: - free_common_dbs_info(policy, gov); +free_policy_dbs_info: + free_policy_dbs_info(policy, gov); free_dbs_data: kfree(dbs_data); return ret; @@ -409,7 +409,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to INIT */ - if (!cdbs->shared || cdbs->shared->policy) + if (!cdbs->policy_dbs || cdbs->policy_dbs->policy) return -EBUSY; if (!--dbs_data->usage_count) { @@ -427,7 +427,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; } - free_common_dbs_info(policy, gov); + free_policy_dbs_info(policy, gov); return 0; } @@ -437,14 +437,14 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy->governor_data; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int io_busy = 0; if (!policy->cur) return -EINVAL; /* State should be equivalent to INIT */ - if (!shared || shared->policy) + if (!policy_dbs || policy_dbs->policy) return -EBUSY; if (gov->governor == GOV_CONSERVATIVE) { @@ -477,7 +477,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) j_cdbs->update_util.func = dbs_update_util_handler; } - shared->policy = policy; + policy_dbs->policy = policy; if (gov->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -494,7 +494,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) od_ops->powersave_bias_init_cpu(cpu); } - gov_set_update_util(shared, sampling_rate); + gov_set_update_util(policy_dbs, sampling_rate); return 0; } @@ -502,14 +502,14 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; /* State should be equivalent to START */ - if (!shared || !shared->policy) + if (!policy_dbs || !policy_dbs->policy) return -EBUSY; - gov_cancel_work(shared); - shared->policy = NULL; + gov_cancel_work(policy_dbs); + policy_dbs->policy = NULL; return 0; } @@ -521,18 +521,18 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); /* State should be equivalent to START */ - if (!cdbs->shared || !cdbs->shared->policy) + if (!cdbs->policy_dbs || !cdbs->policy_dbs->policy) return -EBUSY; - mutex_lock(&cdbs->shared->timer_mutex); - if (policy->max < cdbs->shared->policy->cur) - __cpufreq_driver_target(cdbs->shared->policy, policy->max, + mutex_lock(&cdbs->policy_dbs->timer_mutex); + if (policy->max < cdbs->policy_dbs->policy->cur) + __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cdbs->shared->policy->cur) - __cpufreq_driver_target(cdbs->shared->policy, policy->min, + else if (policy->min > cdbs->policy_dbs->policy->cur) + __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(policy, cpu); - mutex_unlock(&cdbs->shared->timer_mutex); + mutex_unlock(&cdbs->policy_dbs->timer_mutex); return 0; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index c8b7ec22871cd..c90a2d3766fd1 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -131,7 +131,7 @@ static void *get_cpu_dbs_info_s(int cpu) \ */ /* Common to all CPUs of a policy */ -struct cpu_common_dbs_info { +struct policy_dbs_info { struct cpufreq_policy *policy; /* * Per policy mutex that serializes load evaluation from limit-change @@ -146,10 +146,10 @@ struct cpu_common_dbs_info { struct work_struct work; }; -static inline void gov_update_sample_delay(struct cpu_common_dbs_info *shared, +static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, unsigned int delay_us) { - shared->sample_delay_ns = delay_us * NSEC_PER_USEC; + policy_dbs->sample_delay_ns = delay_us * NSEC_PER_USEC; } /* Per cpu structures */ @@ -165,7 +165,7 @@ struct cpu_dbs_info { */ unsigned int prev_load; struct update_util_data update_util; - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; }; struct od_cpu_dbs_info_s { diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 65ad39d95e39c..4a2332733cca1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -151,7 +151,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -255,20 +255,20 @@ static void update_sampling_rate(struct dbs_data *dbs_data, struct cpufreq_policy *policy; struct od_cpu_dbs_info_s *dbs_info; struct cpu_dbs_info *cdbs; - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cdbs = &dbs_info->cdbs; - shared = cdbs->shared; + policy_dbs = cdbs->policy_dbs; /* - * A valid shared and shared->policy means governor hasn't - * stopped or exited yet. + * A valid policy_dbs and policy_dbs->policy means governor + * hasn't stopped or exited yet. */ - if (!shared || !shared->policy) + if (!policy_dbs || !policy_dbs->policy) continue; - policy = shared->policy; + policy = policy_dbs->policy; /* clear all CPUs of this policy */ cpumask_andnot(&cpumask, &cpumask, policy->cpus); @@ -280,7 +280,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * multiple policies that are governed by the same dbs_data. */ if (dbs_data == policy->governor_data) { - mutex_lock(&shared->timer_mutex); + mutex_lock(&policy_dbs->timer_mutex); /* * On 32-bit architectures this may race with the * sample_delay_ns read in dbs_update_util_handler(), @@ -299,8 +299,8 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * too big and it will be corrected next time a sample * is taken, so it shouldn't be significant. */ - gov_update_sample_delay(shared, new_rate); - mutex_unlock(&shared->timer_mutex); + gov_update_sample_delay(policy_dbs, new_rate); + mutex_unlock(&policy_dbs->timer_mutex); } } @@ -573,16 +573,16 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; if (cpumask_test_cpu(cpu, &done)) continue; - shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; - if (!shared) + policy_dbs = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy_dbs; + if (!policy_dbs) continue; - policy = shared->policy; + policy = policy_dbs->policy; cpumask_or(&done, &done, policy->cpus); if (policy->governor != CPU_FREQ_GOV_ONDEMAND) From d10b5eb5fce436ba22443ab83eeb36e195dbf772 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 6 Feb 2016 13:50:24 +0100 Subject: [PATCH 11/61] cpufreq: governor: Drop cpu argument from dbs_check_cpu() Since policy->cpu is always passed as the second argument to dbs_check_cpu(), it is not really necessary to pass it, because the function can obtain that value via its first argument just fine. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 8 ++++---- drivers/cpufreq/cpufreq_governor.h | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 6 ++---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b2df5de6cf92e..b8054e53a37e9 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -107,7 +107,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - dbs_check_cpu(policy, policy->cpu); + dbs_check_cpu(policy); return delay_for_sampling_rate(cs_tuners->sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b425cd3da6829..431d81f7963c5 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -31,8 +31,9 @@ static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov) gov->attr_group_gov_pol : gov->attr_group_gov_sys; } -void dbs_check_cpu(struct cpufreq_policy *policy, int cpu) +void dbs_check_cpu(struct cpufreq_policy *policy) { + int cpu = policy->cpu; struct dbs_governor *gov = dbs_governor_of(policy); struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); struct dbs_data *dbs_data = policy->governor_data; @@ -517,8 +518,7 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to START */ if (!cdbs->policy_dbs || !cdbs->policy_dbs->policy) @@ -531,7 +531,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) else if (policy->min > cdbs->policy_dbs->policy->cur) __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(policy, cpu); + dbs_check_cpu(policy); mutex_unlock(&cdbs->policy_dbs->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index c90a2d3766fd1..63868d7f14f58 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -279,7 +279,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; -void dbs_check_cpu(struct cpufreq_policy *policy, int cpu); +void dbs_check_cpu(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 4a2332733cca1..9ef4402644c7c 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -190,9 +190,7 @@ static void od_check_cpu(int cpu, unsigned int load) static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - cpu); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; @@ -203,7 +201,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { - dbs_check_cpu(policy, cpu); + dbs_check_cpu(policy); if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; From e9751894000af398d5895b3ee96052f57b80cc44 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:23:49 +0100 Subject: [PATCH 12/61] cpufreq: governor: Simplify cpufreq_governor_limits() Use the observation that cpufreq_governor_limits() doesn't have to get to the policy object it wants to manipulate by walking the reference chain cdbs->policy_dbs->policy, as the final pointer is actually equal to its argument, and make it access the policy object directy via its argument. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 431d81f7963c5..ff247a7ac7742 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -519,20 +519,19 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; /* State should be equivalent to START */ - if (!cdbs->policy_dbs || !cdbs->policy_dbs->policy) + if (!policy_dbs || !policy_dbs->policy) return -EBUSY; - mutex_lock(&cdbs->policy_dbs->timer_mutex); - if (policy->max < cdbs->policy_dbs->policy->cur) - __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->max, - CPUFREQ_RELATION_H); - else if (policy->min > cdbs->policy_dbs->policy->cur) - __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->min, - CPUFREQ_RELATION_L); + mutex_lock(&policy_dbs->timer_mutex); + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(policy); - mutex_unlock(&cdbs->policy_dbs->timer_mutex); + mutex_unlock(&policy_dbs->timer_mutex); return 0; } From bc505475b85de9a9903e84ef0b369d4637354201 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:24:26 +0100 Subject: [PATCH 13/61] cpufreq: governor: Rearrange governor data structures The struct policy_dbs_info objects representing per-policy governor data are not accessible directly from the corresponding policy objects. To access them, one has to get a pointer to the struct cpu_dbs_info of policy->cpu and use the policy_dbs field of that which isn't really straightforward. To address that rearrange the governor data structures so the governor_data pointer in struct cpufreq_policy will point to struct policy_dbs_info (instead of struct dbs_data) and that will contain a pointer to struct dbs_data. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 3 +- drivers/cpufreq/cpufreq_conservative.c | 6 ++- drivers/cpufreq/cpufreq_governor.c | 74 +++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 27 +++++----- drivers/cpufreq/cpufreq_ondemand.c | 18 ++++--- 5 files changed, 68 insertions(+), 60 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 6395a5f0ff251..82ae1002def18 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -45,7 +45,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, long d_actual, d_reference; struct msr actual, reference; struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); - struct dbs_data *od_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *od_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = od_data->tuners; struct od_cpu_dbs_info_s *od_info = dbs_governor_of(policy)->get_cpu_dbs_info_s(policy->cpu); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b8054e53a37e9..1a899bb7d1a4d 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -48,7 +48,8 @@ static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; /* @@ -104,7 +105,8 @@ static void cs_check_cpu(int cpu, unsigned int load) static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; dbs_check_cpu(policy); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ff247a7ac7742..82e50dcf9feba 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -35,8 +35,8 @@ void dbs_check_cpu(struct cpufreq_policy *policy) { int cpu = policy->cpu; struct dbs_governor *gov = dbs_governor_of(policy); - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int sampling_rate; @@ -95,6 +95,7 @@ void dbs_check_cpu(struct cpufreq_policy *policy) j_cdbs->prev_cpu_idle = cur_idle_time; if (ignore_nice) { + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); u64 cur_nice; unsigned long cur_nice_jiffies; @@ -292,8 +293,8 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } -static int alloc_policy_dbs_info(struct cpufreq_policy *policy, - struct dbs_governor *gov) +static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, + struct dbs_governor *gov) { struct policy_dbs_info *policy_dbs; int j; @@ -301,7 +302,7 @@ static int alloc_policy_dbs_info(struct cpufreq_policy *policy, /* Allocate memory for the common information for policy->cpus */ policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL); if (!policy_dbs) - return -ENOMEM; + return NULL; /* Set policy_dbs for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) @@ -311,7 +312,7 @@ static int alloc_policy_dbs_info(struct cpufreq_policy *policy, atomic_set(&policy_dbs->skip_work, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); INIT_WORK(&policy_dbs->work, dbs_work_handler); - return 0; + return policy_dbs; } static void free_policy_dbs_info(struct cpufreq_policy *policy, @@ -333,6 +334,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = gov->gdbs_data; + struct policy_dbs_info *policy_dbs; unsigned int latency; int ret; @@ -340,26 +342,26 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (policy->governor_data) return -EBUSY; - if (dbs_data) { - if (WARN_ON(have_governor_per_policy())) - return -EINVAL; - - ret = alloc_policy_dbs_info(policy, gov); - if (ret) - return ret; + policy_dbs = alloc_policy_dbs_info(policy, gov); + if (!policy_dbs) + return -ENOMEM; + if (dbs_data) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto free_policy_dbs_info; + } dbs_data->usage_count++; - policy->governor_data = dbs_data; + policy_dbs->dbs_data = dbs_data; + policy->governor_data = policy_dbs; return 0; } dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); - if (!dbs_data) - return -ENOMEM; - - ret = alloc_policy_dbs_info(policy, gov); - if (ret) - goto free_dbs_data; + if (!dbs_data) { + ret = -ENOMEM; + goto free_policy_dbs_info; + } dbs_data->usage_count = 1; @@ -381,7 +383,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; - policy->governor_data = dbs_data; + policy_dbs->dbs_data = dbs_data; + policy->governor_data = policy_dbs; ret = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr(gov)); @@ -396,21 +399,21 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = NULL; gov->exit(dbs_data, !policy->governor->initialized); + kfree(dbs_data); + free_policy_dbs_info: free_policy_dbs_info(policy, gov); -free_dbs_data: - kfree(dbs_data); return ret; } static int cpufreq_governor_exit(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; /* State should be equivalent to INIT */ - if (!cdbs->policy_dbs || cdbs->policy_dbs->policy) + if (policy_dbs->policy) return -EBUSY; if (!--dbs_data->usage_count) { @@ -435,17 +438,16 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) static int cpufreq_governor_start(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int io_busy = 0; if (!policy->cur) return -EINVAL; /* State should be equivalent to INIT */ - if (!policy_dbs || policy_dbs->policy) + if (policy_dbs->policy) return -EBUSY; if (gov->governor == GOV_CONSERVATIVE) { @@ -501,12 +503,10 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { - struct dbs_governor *gov = dbs_governor_of(policy); - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; + struct policy_dbs_info *policy_dbs = policy->governor_data; /* State should be equivalent to START */ - if (!policy_dbs || !policy_dbs->policy) + if (!policy_dbs->policy) return -EBUSY; gov_cancel_work(policy_dbs); @@ -517,12 +517,10 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { - struct dbs_governor *gov = dbs_governor_of(policy); - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; + struct policy_dbs_info *policy_dbs = policy->governor_data; /* State should be equivalent to START */ - if (!policy_dbs || !policy_dbs->policy) + if (!policy_dbs->policy) return -EBUSY; mutex_lock(&policy_dbs->timer_mutex); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 63868d7f14f58..95e6834d36a83 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -85,7 +85,8 @@ static ssize_t show_##file_name##_gov_sys \ static ssize_t show_##file_name##_gov_pol \ (struct cpufreq_policy *policy, char *buf) \ { \ - struct dbs_data *dbs_data = policy->governor_data; \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + struct dbs_data *dbs_data = policy_dbs->dbs_data; \ struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ return sprintf(buf, "%u\n", tuners->file_name); \ } @@ -101,8 +102,8 @@ static ssize_t store_##file_name##_gov_sys \ static ssize_t store_##file_name##_gov_pol \ (struct cpufreq_policy *policy, const char *buf, size_t count) \ { \ - struct dbs_data *dbs_data = policy->governor_data; \ - return store_##file_name(dbs_data, buf, count); \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + return store_##file_name(policy_dbs->dbs_data, buf, count); \ } #define show_store_one(_gov, file_name) \ @@ -130,6 +131,13 @@ static void *get_cpu_dbs_info_s(int cpu) \ * cs_*: Conservative governor */ +/* Governor demand based switching data (per-policy or global). */ +struct dbs_data { + unsigned int min_sampling_rate; + int usage_count; + void *tuners; +}; + /* Common to all CPUs of a policy */ struct policy_dbs_info { struct cpufreq_policy *policy; @@ -144,6 +152,8 @@ struct policy_dbs_info { atomic_t skip_work; struct irq_work irq_work; struct work_struct work; + /* dbs_data may be shared between multiple policy objects */ + struct dbs_data *dbs_data; }; static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, @@ -204,7 +214,6 @@ struct cs_dbs_tuners { }; /* Common Governor data across policies */ -struct dbs_data; struct dbs_governor { struct cpufreq_governor gov; @@ -236,13 +245,6 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy return container_of(policy->governor, struct dbs_governor, gov); } -/* Governor Per policy data */ -struct dbs_data { - unsigned int min_sampling_rate; - int usage_count; - void *tuners; -}; - /* Governor specific ops, will be passed to dbs_data->gov_ops */ struct od_ops { void (*powersave_bias_init_cpu)(int cpu); @@ -273,7 +275,8 @@ static ssize_t show_sampling_rate_min_gov_sys \ static ssize_t show_sampling_rate_min_gov_pol \ (struct cpufreq_policy *policy, char *buf) \ { \ - struct dbs_data *dbs_data = policy->governor_data; \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + struct dbs_data *dbs_data = policy_dbs->dbs_data; \ return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 9ef4402644c7c..b7ef2e7f4d4a4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -78,7 +78,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int jiffies_total, jiffies_hi, jiffies_lo; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; if (!dbs_info->freq_table) { @@ -130,7 +131,8 @@ static void ondemand_powersave_bias_init(void) static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) { - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; if (od_tuners->powersave_bias) @@ -151,8 +153,9 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = dbs_info->cdbs.policy_dbs; + struct cpufreq_policy *policy = policy_dbs->policy; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; dbs_info->freq_lo = 0; @@ -189,7 +192,8 @@ static void od_check_cpu(int cpu, unsigned int load) static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; @@ -277,7 +281,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * policy will be governed by dbs_data, otherwise there can be * multiple policies that are governed by the same dbs_data. */ - if (dbs_data == policy->governor_data) { + if (dbs_data == policy_dbs->dbs_data) { mutex_lock(&policy_dbs->timer_mutex); /* * On 32-bit architectures this may race with the @@ -586,7 +590,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) if (policy->governor != CPU_FREQ_GOV_ONDEMAND) continue; - dbs_data = policy->governor_data; + dbs_data = policy_dbs->dbs_data; od_tuners = dbs_data->tuners; od_tuners->powersave_bias = default_powersave_bias; } From cea6a9e77228c261191bc92df0d24bf5356b99ff Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:25:02 +0100 Subject: [PATCH 14/61] cpufreq: governor: Symmetrize cpu_dbs_info initialization and cleanup Make the initialization of struct cpu_dbs_info objects in alloc_policy_dbs_info() and the code that cleans them up in free_policy_dbs_info() more symmetrical. In particular, set/clear the update_util.func field in those functions along with the policy_dbs field. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 82e50dcf9feba..7c08d8360f721 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -304,14 +304,18 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli if (!policy_dbs) return NULL; - /* Set policy_dbs for all CPUs, online+offline */ - for_each_cpu(j, policy->related_cpus) - gov->get_cpu_cdbs(j)->policy_dbs = policy_dbs; - mutex_init(&policy_dbs->timer_mutex); atomic_set(&policy_dbs->skip_work, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); INIT_WORK(&policy_dbs->work, dbs_work_handler); + + /* Set policy_dbs for all CPUs, online+offline */ + for_each_cpu(j, policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + + j_cdbs->policy_dbs = policy_dbs; + j_cdbs->update_util.func = dbs_update_util_handler; + } return policy_dbs; } @@ -324,9 +328,12 @@ static void free_policy_dbs_info(struct cpufreq_policy *policy, mutex_destroy(&policy_dbs->timer_mutex); - for_each_cpu(j, policy->cpus) - gov->get_cpu_cdbs(j)->policy_dbs = NULL; + for_each_cpu(j, policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + j_cdbs->policy_dbs = NULL; + j_cdbs->update_util.func = NULL; + } kfree(policy_dbs); } @@ -477,8 +484,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - - j_cdbs->update_util.func = dbs_update_util_handler; } policy_dbs->policy = policy; From 686cc637c99324ad52a6f8e59181f6407405bfe2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Feb 2016 23:41:10 +0100 Subject: [PATCH 15/61] cpufreq: governor: Rename skip_work to work_count The skip_work field in struct policy_dbs_info technically is a counter, so give it a new name to reflect that. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 14 +++++++------- drivers/cpufreq/cpufreq_governor.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 7c08d8360f721..298be52adea00 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -196,16 +196,16 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) static void gov_cancel_work(struct policy_dbs_info *policy_dbs) { /* Tell dbs_update_util_handler() to skip queuing up work items. */ - atomic_inc(&policy_dbs->skip_work); + atomic_inc(&policy_dbs->work_count); /* * If dbs_update_util_handler() is already running, it may not notice - * the incremented skip_work, so wait for it to complete to prevent its + * the incremented work_count, so wait for it to complete to prevent its * work item from being queued up after the cancel_work_sync() below. */ gov_clear_update_util(policy_dbs->policy); irq_work_sync(&policy_dbs->irq_work); cancel_work_sync(&policy_dbs->work); - atomic_set(&policy_dbs->skip_work, 0); + atomic_set(&policy_dbs->work_count, 0); } static void dbs_work_handler(struct work_struct *work) @@ -234,7 +234,7 @@ static void dbs_work_handler(struct work_struct *work) * up using a stale sample delay value. */ smp_mb__before_atomic(); - atomic_dec(&policy_dbs->skip_work); + atomic_dec(&policy_dbs->work_count); } static void dbs_irq_work(struct irq_work *irq_work) @@ -267,7 +267,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * - The governor is being stopped. * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&policy_dbs->skip_work) == 1) { + if (atomic_inc_return(&policy_dbs->work_count) == 1) { u64 delta_ns; delta_ns = time - policy_dbs->last_sample_time; @@ -277,7 +277,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, return; } } - atomic_dec(&policy_dbs->skip_work); + atomic_dec(&policy_dbs->work_count); } static void set_sampling_rate(struct dbs_data *dbs_data, @@ -305,7 +305,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli return NULL; mutex_init(&policy_dbs->timer_mutex); - atomic_set(&policy_dbs->skip_work, 0); + atomic_set(&policy_dbs->work_count, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); INIT_WORK(&policy_dbs->work, dbs_work_handler); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 95e6834d36a83..37537220e48cc 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -149,7 +149,7 @@ struct policy_dbs_info { u64 last_sample_time; s64 sample_delay_ns; - atomic_t skip_work; + atomic_t work_count; struct irq_work irq_work; struct work_struct work; /* dbs_data may be shared between multiple policy objects */ From fafd5e8ab29d965d6c7db326f2d4189dd9f3b002 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Feb 2016 23:57:22 +0100 Subject: [PATCH 16/61] cpufreq: governor: Drop pointless goto from cpufreq_governor_init() It is silly to jump around "return 0", so don't do that. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 298be52adea00..d6bd402a3237e 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -395,12 +395,11 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr(gov)); - if (ret) - goto reset_gdbs_data; + if (!ret) + return 0; - return 0; + /* Failure, so roll back. */ -reset_gdbs_data: policy->governor_data = NULL; if (!have_governor_per_policy()) From d0684d3b8934cfb8171755cdb1fc87f4c0335655 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:31 +0530 Subject: [PATCH 17/61] cpufreq: governor: Create generic macro for common tunables Some tunables are present in governor-specific structures, whereas one (min_sampling_rate) is located directly in struct dbs_data. There is a special macro for creating its sysfs attribute and the show/store callbacks, but since more tunables are going to be moved to struct dbs_data, a new generic macro for such cases will be useful, so add it and use it for min_sampling_rate. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 8 +++--- drivers/cpufreq/cpufreq_governor.h | 36 ++++++++++++++------------ drivers/cpufreq/cpufreq_ondemand.c | 8 +++--- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 1a899bb7d1a4d..a69eb7eae7ecb 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -245,7 +245,7 @@ show_store_one(cs, up_threshold); show_store_one(cs, down_threshold); show_store_one(cs, ignore_nice_load); show_store_one(cs, freq_step); -declare_show_sampling_rate_min(cs); +show_one_common(cs, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(sampling_down_factor); @@ -253,10 +253,10 @@ gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(down_threshold); gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(freq_step); -gov_sys_pol_attr_ro(sampling_rate_min); +gov_sys_pol_attr_ro(min_sampling_rate); static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, + &min_sampling_rate_gov_sys.attr, &sampling_rate_gov_sys.attr, &sampling_down_factor_gov_sys.attr, &up_threshold_gov_sys.attr, @@ -272,7 +272,7 @@ static struct attribute_group cs_attr_group_gov_sys = { }; static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, + &min_sampling_rate_gov_pol.attr, &sampling_rate_gov_pol.attr, &sampling_down_factor_gov_pol.attr, &up_threshold_gov_pol.attr, diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 37537220e48cc..cdf7536ac5fbe 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -110,6 +110,26 @@ static ssize_t store_##file_name##_gov_pol \ show_one(_gov, file_name); \ store_one(_gov, file_name) +#define show_one_common(_gov, file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + struct dbs_data *dbs_data = policy_dbs->dbs_data; \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} + +#define show_store_one_common(_gov, file_name) \ +show_one_common(_gov, file_name); \ +store_one(_gov, file_name) + /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ @@ -264,22 +284,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) return delay; } -#define declare_show_sampling_rate_min(_gov) \ -static ssize_t show_sampling_rate_min_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} \ - \ -static ssize_t show_sampling_rate_min_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} - extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct cpufreq_policy *policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index b7ef2e7f4d4a4..8c44bc3fffc54 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -443,7 +443,7 @@ show_store_one(od, up_threshold); show_store_one(od, sampling_down_factor); show_store_one(od, ignore_nice_load); show_store_one(od, powersave_bias); -declare_show_sampling_rate_min(od); +show_one_common(od, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(io_is_busy); @@ -451,10 +451,10 @@ gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(sampling_down_factor); gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(powersave_bias); -gov_sys_pol_attr_ro(sampling_rate_min); +gov_sys_pol_attr_ro(min_sampling_rate); static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, + &min_sampling_rate_gov_sys.attr, &sampling_rate_gov_sys.attr, &up_threshold_gov_sys.attr, &sampling_down_factor_gov_sys.attr, @@ -470,7 +470,7 @@ static struct attribute_group od_attr_group_gov_sys = { }; static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, + &min_sampling_rate_gov_pol.attr, &sampling_rate_gov_pol.attr, &up_threshold_gov_pol.attr, &sampling_down_factor_gov_pol.attr, From ff4b17895e3166084c76ae703cb1c757bcc59799 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:32 +0530 Subject: [PATCH 18/61] cpufreq: governor: Move common tunables to 'struct dbs_data' There are a few common tunables shared between the ondemand and conservative governors. Move them to struct dbs_data to simplify code. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 38 +++++++++++------------- drivers/cpufreq/cpufreq_governor.c | 37 +++++------------------ drivers/cpufreq/cpufreq_governor.h | 14 ++++----- drivers/cpufreq/cpufreq_ondemand.c | 41 +++++++++++--------------- 4 files changed, 47 insertions(+), 83 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index a69eb7eae7ecb..4f640b028c94c 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -60,7 +60,7 @@ static void cs_check_cpu(int cpu, unsigned int load) return; /* Check for frequency increase */ - if (load > cs_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { dbs_info->down_skip = 0; /* if we are already at full speed then break out early */ @@ -78,7 +78,7 @@ static void cs_check_cpu(int cpu, unsigned int load) } /* if sampling_down_factor is active break out early */ - if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) + if (++dbs_info->down_skip < dbs_data->sampling_down_factor) return; dbs_info->down_skip = 0; @@ -107,10 +107,9 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; dbs_check_cpu(policy); - return delay_for_sampling_rate(cs_tuners->sampling_rate); + return delay_for_sampling_rate(dbs_data->sampling_rate); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -126,7 +125,6 @@ static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -134,14 +132,13 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - cs_tuners->sampling_down_factor = input; + dbs_data->sampling_down_factor = input; return count; } static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -149,7 +146,7 @@ static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, if (ret != 1) return -EINVAL; - cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); + dbs_data->sampling_rate = max(input, dbs_data->min_sampling_rate); return count; } @@ -164,7 +161,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) return -EINVAL; - cs_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; } @@ -178,7 +175,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, /* cannot be lower than 11 otherwise freq will not fall */ if (ret != 1 || input < 11 || input > 100 || - input >= cs_tuners->up_threshold) + input >= dbs_data->up_threshold) return -EINVAL; cs_tuners->down_threshold = input; @@ -188,7 +185,6 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input, j; int ret; @@ -199,10 +195,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1; - if (input == cs_tuners->ignore_nice_load) /* nothing to do */ + if (input == dbs_data->ignore_nice_load) /* nothing to do */ return count; - cs_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -210,7 +206,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_info = &per_cpu(cs_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, 0); - if (cs_tuners->ignore_nice_load) + if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } @@ -239,12 +235,12 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(cs, sampling_rate); -show_store_one(cs, sampling_down_factor); -show_store_one(cs, up_threshold); show_store_one(cs, down_threshold); -show_store_one(cs, ignore_nice_load); show_store_one(cs, freq_step); +show_store_one_common(cs, sampling_rate); +show_store_one_common(cs, sampling_down_factor); +show_store_one_common(cs, up_threshold); +show_store_one_common(cs, ignore_nice_load); show_one_common(cs, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); @@ -299,11 +295,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) return -ENOMEM; } - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; tuners->freq_step = DEF_FREQUENCY_STEP; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0; dbs_data->tuners = tuners; dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index d6bd402a3237e..3569782771ef8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -38,10 +38,9 @@ void dbs_check_cpu(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - unsigned int sampling_rate; + unsigned int sampling_rate = dbs_data->sampling_rate; + unsigned int ignore_nice = dbs_data->ignore_nice_load; unsigned int max_load = 0; - unsigned int ignore_nice; unsigned int j; if (gov->governor == GOV_ONDEMAND) { @@ -54,13 +53,8 @@ void dbs_check_cpu(struct cpufreq_policy *policy) * the 'sampling_rate', so as to keep the wake-up-from-idle * detection logic a bit conservative. */ - sampling_rate = od_tuners->sampling_rate; sampling_rate *= od_dbs_info->rate_mult; - ignore_nice = od_tuners->ignore_nice_load; - } else { - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; } /* Get Absolute Load */ @@ -280,19 +274,6 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, atomic_dec(&policy_dbs->work_count); } -static void set_sampling_rate(struct dbs_data *dbs_data, - struct dbs_governor *gov, - unsigned int sampling_rate) -{ - if (gov->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - cs_tuners->sampling_rate = sampling_rate; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - od_tuners->sampling_rate = sampling_rate; - } -} - static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { @@ -384,8 +365,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); - set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate, - latency * LATENCY_MULTIPLIER)); + dbs_data->sampling_rate = max(dbs_data->min_sampling_rate, + LATENCY_MULTIPLIER * latency); if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; @@ -456,16 +437,12 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (policy_dbs->policy) return -EBUSY; - if (gov->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + sampling_rate = dbs_data->sampling_rate; + ignore_nice = dbs_data->ignore_nice_load; - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; - } else { + if (gov->governor == GOV_ONDEMAND) { struct od_dbs_tuners *od_tuners = dbs_data->tuners; - sampling_rate = od_tuners->sampling_rate; - ignore_nice = od_tuners->ignore_nice_load; io_busy = od_tuners->io_is_busy; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index cdf7536ac5fbe..e296362d21d2e 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -153,9 +153,13 @@ static void *get_cpu_dbs_info_s(int cpu) \ /* Governor demand based switching data (per-policy or global). */ struct dbs_data { - unsigned int min_sampling_rate; int usage_count; void *tuners; + unsigned int min_sampling_rate; + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; }; /* Common to all CPUs of a policy */ @@ -216,19 +220,11 @@ struct cs_cpu_dbs_info_s { /* Per policy Governors sysfs tunables */ struct od_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; unsigned int powersave_bias; unsigned int io_is_busy; }; struct cs_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; unsigned int down_threshold; unsigned int freq_step; }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 8c44bc3fffc54..13c64b662fa18 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -110,7 +110,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, dbs_info->freq_lo_jiffies = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); + jiffies_total = usecs_to_jiffies(dbs_data->sampling_rate); jiffies_hi = (freq_avg - freq_lo) * jiffies_total; jiffies_hi += ((freq_hi - freq_lo) / 2); jiffies_hi /= (freq_hi - freq_lo); @@ -161,11 +161,10 @@ static void od_check_cpu(int cpu, unsigned int load) dbs_info->freq_lo = 0; /* Check for frequency increase */ - if (load > od_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { /* If switching to max speed, apply sampling_down_factor */ if (policy->cur < policy->max) - dbs_info->rate_mult = - od_tuners->sampling_down_factor; + dbs_info->rate_mult = dbs_data->sampling_down_factor; dbs_freq_increase(policy, policy->max); } else { /* Calculate the next frequency proportional to load */ @@ -195,7 +194,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ @@ -214,7 +212,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) } if (!delay) - delay = delay_for_sampling_rate(od_tuners->sampling_rate + delay = delay_for_sampling_rate(dbs_data->sampling_rate * dbs_info->rate_mult); return delay; @@ -239,11 +237,10 @@ static struct dbs_governor od_dbs_gov; static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cpumask cpumask; int cpu; - od_tuners->sampling_rate = new_rate = max(new_rate, + dbs_data->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate); /* @@ -348,7 +345,6 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -358,21 +354,20 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, return -EINVAL; } - od_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; } static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input, j; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - od_tuners->sampling_down_factor = input; + dbs_data->sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ for_each_online_cpu(j) { @@ -399,10 +394,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1; - if (input == od_tuners->ignore_nice_load) { /* nothing to do */ + if (input == dbs_data->ignore_nice_load) { /* nothing to do */ return count; } - od_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -410,7 +405,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); - if (od_tuners->ignore_nice_load) + if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -437,12 +432,12 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(od, sampling_rate); show_store_one(od, io_is_busy); -show_store_one(od, up_threshold); -show_store_one(od, sampling_down_factor); -show_store_one(od, ignore_nice_load); show_store_one(od, powersave_bias); +show_store_one_common(od, sampling_rate); +show_store_one_common(od, up_threshold); +show_store_one_common(od, sampling_down_factor); +show_store_one_common(od, ignore_nice_load); show_one_common(od, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); @@ -504,7 +499,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) put_cpu(); if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ - tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; /* * In nohz/micro accounting case we set the minimum frequency * not depending on HZ, but fixed (very low). The deferred @@ -512,15 +507,15 @@ static int od_init(struct dbs_data *dbs_data, bool notify) */ dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; } else { - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; /* For correct statistics, we need 10 ticks for each measure */ dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); } - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; tuners->io_is_busy = should_io_be_busy(); From c4435630361d9bebf7154a0c842dc1fb7ae39c99 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:33 +0530 Subject: [PATCH 19/61] cpufreq: governor: New sysfs show/store callbacks for governor tunables The ondemand and conservative governors use the global-attr or freq-attr structures to represent sysfs attributes corresponding to their tunables (which of them is actually used depends on whether or not different policy objects can use the same governor with different tunables at the same time and, consequently, on where those attributes are located in sysfs). Unfortunately, in the freq-attr case, the standard cpufreq show/store sysfs attribute callbacks are applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That may lead to an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. Therefore policy->rwsem cannot be dropped in cpufreq_set_policy() at any point, but the deadlock situation described above must be avoided too. To that end, use the observation that in principle governor tunables may be represented by the same data type regardless of whether the governor is system-wide or per-policy and introduce a new structure, struct governor_attr, for representing them and new corresponding macros for creating show/store sysfs callbacks for them. Also make their parent kobject use a new kobject type whose default show/store callbacks are not related to the standard core cpufreq ones in any way (and they don't acquire policy->rwsem in particular). Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Subject & changelog + rebase ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 72 +++++++++----------------- drivers/cpufreq/cpufreq_governor.c | 64 ++++++++++++++++++++--- drivers/cpufreq/cpufreq_governor.h | 39 +++++++++++++- drivers/cpufreq/cpufreq_ondemand.c | 72 +++++++++----------------- 4 files changed, 144 insertions(+), 103 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4f640b028c94c..ed081dbce00cd 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -235,54 +235,33 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(cs, down_threshold); -show_store_one(cs, freq_step); -show_store_one_common(cs, sampling_rate); -show_store_one_common(cs, sampling_down_factor); -show_store_one_common(cs, up_threshold); -show_store_one_common(cs, ignore_nice_load); -show_one_common(cs, min_sampling_rate); - -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(down_threshold); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(freq_step); -gov_sys_pol_attr_ro(min_sampling_rate); - -static struct attribute *dbs_attributes_gov_sys[] = { - &min_sampling_rate_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &up_threshold_gov_sys.attr, - &down_threshold_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &freq_step_gov_sys.attr, +gov_show_one_common(sampling_rate); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(up_threshold); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one(cs, down_threshold); +gov_show_one(cs, freq_step); + +gov_attr_rw(sampling_rate); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(up_threshold); +gov_attr_rw(ignore_nice_load); +gov_attr_ro(min_sampling_rate); +gov_attr_rw(down_threshold); +gov_attr_rw(freq_step); + +static struct attribute *cs_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &up_threshold.attr, + &down_threshold.attr, + &ignore_nice_load.attr, + &freq_step.attr, NULL }; -static struct attribute_group cs_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "conservative", -}; - -static struct attribute *dbs_attributes_gov_pol[] = { - &min_sampling_rate_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &up_threshold_gov_pol.attr, - &down_threshold_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &freq_step_gov_pol.attr, - NULL -}; - -static struct attribute_group cs_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "conservative", -}; - /************************** sysfs end ************************/ static int cs_init(struct dbs_data *dbs_data, bool notify) @@ -331,8 +310,7 @@ static struct dbs_governor cs_dbs_gov = { .owner = THIS_MODULE, }, .governor = GOV_CONSERVATIVE, - .attr_group_gov_sys = &cs_attr_group_gov_sys, - .attr_group_gov_pol = &cs_attr_group_gov_pol, + .kobj_type = { .default_attrs = cs_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 3569782771ef8..00cb468d3b6a5 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -25,12 +25,58 @@ DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); -static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov) +static inline struct dbs_data *to_dbs_data(struct kobject *kobj) { - return have_governor_per_policy() ? - gov->attr_group_gov_pol : gov->attr_group_gov_sys; + return container_of(kobj, struct dbs_data, kobj); } +static inline struct governor_attr *to_gov_attr(struct attribute *attr) +{ + return container_of(attr, struct governor_attr, attr); +} + +static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret = -EIO; + + if (gattr->show) + ret = gattr->show(dbs_data, buf); + + return ret; +} + +static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret = -EIO; + + mutex_lock(&dbs_data->mutex); + + if (gattr->store) + ret = gattr->store(dbs_data, buf, count); + + mutex_unlock(&dbs_data->mutex); + + return ret; +} + +/* + * Sysfs Ops for accessing governor attributes. + * + * All show/store invocations for governor specific sysfs attributes, will first + * call the below show/store callbacks and the attribute specific callback will + * be called from within it. + */ +static const struct sysfs_ops governor_sysfs_ops = { + .show = governor_show, + .store = governor_store, +}; + void dbs_check_cpu(struct cpufreq_policy *policy) { int cpu = policy->cpu; @@ -352,6 +398,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) } dbs_data->usage_count = 1; + mutex_init(&dbs_data->mutex); ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) @@ -374,12 +421,15 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; - ret = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr(gov)); + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; + ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, + get_governor_parent_kobj(policy), + "%s", gov->gov.name); if (!ret) return 0; /* Failure, so roll back. */ + pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); policy->governor_data = NULL; @@ -404,8 +454,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) return -EBUSY; if (!--dbs_data->usage_count) { - sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr(gov)); + kobject_put(&dbs_data->kobj); policy->governor_data = NULL; @@ -413,6 +462,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) gov->gdbs_data = NULL; gov->exit(dbs_data, policy->governor->initialized == 1); + mutex_destroy(&dbs_data->mutex); kfree(dbs_data); } else { policy->governor_data = NULL; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index e296362d21d2e..bdb6e4940b756 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -160,8 +160,44 @@ struct dbs_data { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; + + struct kobject kobj; + /* Protect concurrent updates to governor tunables from sysfs */ + struct mutex mutex; +}; + +/* Governor's specific attributes */ +struct dbs_data; +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct dbs_data *dbs_data, char *buf); + ssize_t (*store)(struct dbs_data *dbs_data, const char *buf, + size_t count); }; +#define gov_show_one(_gov, file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ +{ \ + struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} + +#define gov_show_one_common(file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} + +#define gov_attr_ro(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define gov_attr_rw(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + /* Common to all CPUs of a policy */ struct policy_dbs_info { struct cpufreq_policy *policy; @@ -236,8 +272,7 @@ struct dbs_governor { #define GOV_ONDEMAND 0 #define GOV_CONSERVATIVE 1 int governor; - struct attribute_group *attr_group_gov_sys; /* one governor - system */ - struct attribute_group *attr_group_gov_pol; /* one governor - policy */ + struct kobj_type kobj_type; /* * Common data for platforms that don't set diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 13c64b662fa18..e36792f60348d 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -432,54 +432,33 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(od, io_is_busy); -show_store_one(od, powersave_bias); -show_store_one_common(od, sampling_rate); -show_store_one_common(od, up_threshold); -show_store_one_common(od, sampling_down_factor); -show_store_one_common(od, ignore_nice_load); -show_one_common(od, min_sampling_rate); - -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(io_is_busy); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(powersave_bias); -gov_sys_pol_attr_ro(min_sampling_rate); - -static struct attribute *dbs_attributes_gov_sys[] = { - &min_sampling_rate_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &up_threshold_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &powersave_bias_gov_sys.attr, - &io_is_busy_gov_sys.attr, +gov_show_one_common(sampling_rate); +gov_show_one_common(up_threshold); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one(od, io_is_busy); +gov_show_one(od, powersave_bias); + +gov_attr_rw(sampling_rate); +gov_attr_rw(io_is_busy); +gov_attr_rw(up_threshold); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(ignore_nice_load); +gov_attr_rw(powersave_bias); +gov_attr_ro(min_sampling_rate); + +static struct attribute *od_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + &io_is_busy.attr, NULL }; -static struct attribute_group od_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "ondemand", -}; - -static struct attribute *dbs_attributes_gov_pol[] = { - &min_sampling_rate_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &up_threshold_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &powersave_bias_gov_pol.attr, - &io_is_busy_gov_pol.attr, - NULL -}; - -static struct attribute_group od_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "ondemand", -}; - /************************** sysfs end ************************/ static int od_init(struct dbs_data *dbs_data, bool notify) @@ -544,8 +523,7 @@ static struct dbs_governor od_dbs_gov = { .owner = THIS_MODULE, }, .governor = GOV_ONDEMAND, - .attr_group_gov_sys = &od_attr_group_gov_sys, - .attr_group_gov_pol = &od_attr_group_gov_pol, + .kobj_type = { .default_attrs = od_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer, From fd8ddc482a7a5e015c0613c4d96543d5efad047c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:34 +0530 Subject: [PATCH 20/61] cpufreq: governor: Drop unused macros for creating governor tunable attributes The previous commit introduced a new set of macros for creating sysfs attributes that represent governor tunables and the old macros used for this purpose are not needed any more, so drop them. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.h | 89 ------------------------------ 1 file changed, 89 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index bdb6e4940b756..0eb66a6c95035 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -41,95 +41,6 @@ /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; -/* - * Macro for creating governors sysfs routines - * - * - gov_sys: One governor instance per whole system - * - gov_pol: One governor instance per policy - */ - -/* Create attributes */ -#define gov_sys_attr_ro(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) - -#define gov_sys_attr_rw(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) - -#define gov_pol_attr_ro(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0444, show_##_name##_gov_pol, NULL) - -#define gov_pol_attr_rw(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) - -#define gov_sys_pol_attr_rw(_name) \ - gov_sys_attr_rw(_name); \ - gov_pol_attr_rw(_name) - -#define gov_sys_pol_attr_ro(_name) \ - gov_sys_attr_ro(_name); \ - gov_pol_attr_ro(_name) - -/* Create show/store routines */ -#define show_one(_gov, file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct _gov##_dbs_tuners *tuners = _gov##_dbs_gov.gdbs_data->tuners; \ - return sprintf(buf, "%u\n", tuners->file_name); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ - return sprintf(buf, "%u\n", tuners->file_name); \ -} - -#define store_one(_gov, file_name) \ -static ssize_t store_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return store_##file_name(dbs_data, buf, count); \ -} \ - \ -static ssize_t store_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, const char *buf, size_t count) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - return store_##file_name(policy_dbs->dbs_data, buf, count); \ -} - -#define show_store_one(_gov, file_name) \ -show_one(_gov, file_name); \ -store_one(_gov, file_name) - -#define show_one_common(_gov, file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return sprintf(buf, "%u\n", dbs_data->file_name); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - return sprintf(buf, "%u\n", dbs_data->file_name); \ -} - -#define show_store_one_common(_gov, file_name) \ -show_one_common(_gov, file_name); \ -store_one(_gov, file_name) - /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ From 68e80dae09033d778b98dc88e5bfe8fdade188e5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:35 +0530 Subject: [PATCH 21/61] Revert "cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT" Earlier, when the struct freq-attr was used to represent governor attributes, the standard cpufreq show/store sysfs attribute callbacks were applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That could have resulted in an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. The previous commit, "cpufreq: governor: New sysfs show/store callbacks for governor tunables", fixed the original ABBA deadlock by adding new governor specific show/store callbacks. We don't have to drop rwsem around invocations of governor event CPUFREQ_GOV_POLICY_EXIT anymore, and original fix can be reverted now. Fixes: 955ef4833574 (cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT) Signed-off-by: Viresh Kumar Reported-by: Juri Lelli Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 ----- include/linux/cpufreq.h | 4 ---- 2 files changed, 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e172b2a02c1d4..e92e9eab7c6c9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2205,10 +2205,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } - up_write(&policy->rwsem); ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); - if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2224,9 +2221,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, if (!ret) goto out; - up_write(&policy->rwsem); __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); } /* new governor failed, so re-start old one */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 704d85bf72423..cac3d1ba82002 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -100,10 +100,6 @@ struct cpufreq_policy { * - Any routine that will write to the policy structure and/or may take away * the policy altogether (eg. CPU hotplug), will hold this lock in write * mode before doing so. - * - * Additional rules: - * - Lock should not be held across - * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); */ struct rw_semaphore rwsem; From c54df0718423ea2941151d8516eb76ca6a32a4b4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Feb 2016 11:00:25 +0530 Subject: [PATCH 22/61] cpufreq: governor: Create and traverse list of policy_dbs to avoid deadlock The dbs_data_mutex lock is currently used in two places. First, cpufreq_governor_dbs() uses it to guarantee mutual exclusion between invocations of governor operations from the core. Second, it is used by ondemand governor's update_sampling_rate() to ensure the stability of data structures walked by it. The second usage is quite problematic, because update_sampling_rate() is called from a governor sysfs attribute's ->store callback and that leads to a deadlock scenario involving cpufreq_governor_exit() which runs under dbs_data_mutex. Thus it is better to rework the code so update_sampling_rate() doesn't need to acquire dbs_data_mutex. To that end, rework update_sampling_rate() to walk a list of policy_dbs objects supported by the dbs_data one it has been called for (instead of walking cpu_dbs_info object for all CPUs). The list manipulation is protected with dbs_data->mutex which also is held around the execution of update_sampling_rate(), it is not necessary to hold dbs_data_mutex in that function any more. Reported-by: Juri Lelli Reported-by: Shilpasri G Bhat Signed-off-by: Viresh Kumar [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 22 ++++++-- drivers/cpufreq/cpufreq_governor.h | 7 ++- drivers/cpufreq/cpufreq_ondemand.c | 89 ++++++++++-------------------- 3 files changed, 54 insertions(+), 64 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 00cb468d3b6a5..2f35270fbd437 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -385,9 +385,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = -EINVAL; goto free_policy_dbs_info; } - dbs_data->usage_count++; policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; + + mutex_lock(&dbs_data->mutex); + dbs_data->usage_count++; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + mutex_unlock(&dbs_data->mutex); + return 0; } @@ -397,7 +402,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; } - dbs_data->usage_count = 1; + INIT_LIST_HEAD(&dbs_data->policy_dbs_list); mutex_init(&dbs_data->mutex); ret = gov->init(dbs_data, !policy->governor->initialized); @@ -418,9 +423,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; - policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; + policy_dbs->dbs_data = dbs_data; + dbs_data->usage_count = 1; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, get_governor_parent_kobj(policy), @@ -448,12 +456,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; + int count; /* State should be equivalent to INIT */ if (policy_dbs->policy) return -EBUSY; - if (!--dbs_data->usage_count) { + mutex_lock(&dbs_data->mutex); + list_del(&policy_dbs->list); + count = --dbs_data->usage_count; + mutex_unlock(&dbs_data->mutex); + + if (!count) { kobject_put(&dbs_data->kobj); policy->governor_data = NULL; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 0eb66a6c95035..8bf4775ce03c7 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -73,7 +73,11 @@ struct dbs_data { unsigned int up_threshold; struct kobject kobj; - /* Protect concurrent updates to governor tunables from sysfs */ + struct list_head policy_dbs_list; + /* + * Protect concurrent updates to governor tunables from sysfs, + * policy_dbs_list and usage_count. + */ struct mutex mutex; }; @@ -125,6 +129,7 @@ struct policy_dbs_info { struct work_struct work; /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; + struct list_head list; }; static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index e36792f60348d..38301c6b31c78 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -226,84 +226,55 @@ static struct dbs_governor od_dbs_gov; * @new_rate: new sampling rate * * If new rate is smaller than the old, simply updating - * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the + * dbs.sampling_rate might not be appropriate. For example, if the * original sampling_rate was 1 second and the requested new sampling rate is 10 * ms because the user needs immediate reaction from ondemand governor, but not * sure if higher frequency will be required or not, then, the governor may * change the sampling rate too late; up to 1 second later. Thus, if we are * reducing the sampling rate, we need to make the new value effective * immediately. + * + * On the other hand, if new rate is larger than the old, then we may evaluate + * the load too soon, and it might we worth updating sample_delay_ns then as + * well. + * + * This must be called with dbs_data->mutex held, otherwise traversing + * policy_dbs_list isn't safe. */ static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { - struct cpumask cpumask; - int cpu; + struct policy_dbs_info *policy_dbs; dbs_data->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate); /* - * Lock governor so that governor start/stop can't execute in parallel. + * We are operating under dbs_data->mutex and so the list and its + * entries can't be freed concurrently. */ - mutex_lock(&dbs_data_mutex); - - cpumask_copy(&cpumask, cpu_online_mask); - - for_each_cpu(cpu, &cpumask) { - struct cpufreq_policy *policy; - struct od_cpu_dbs_info_s *dbs_info; - struct cpu_dbs_info *cdbs; - struct policy_dbs_info *policy_dbs; - - dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - cdbs = &dbs_info->cdbs; - policy_dbs = cdbs->policy_dbs; - + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + mutex_lock(&policy_dbs->timer_mutex); /* - * A valid policy_dbs and policy_dbs->policy means governor - * hasn't stopped or exited yet. + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), but that + * really doesn't matter. If the read returns a value that's + * too big, the sample will be skipped, but the next invocation + * of dbs_update_util_handler() (when the update has been + * completed) will take a sample. If the returned value is too + * small, the sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect immediately + * anyway. + * + * If this runs in parallel with dbs_work_handler(), we may end + * up overwriting the sample_delay_ns value that it has just + * written, but the difference should not be too big and it will + * be corrected next time a sample is taken, so it shouldn't be + * significant. */ - if (!policy_dbs || !policy_dbs->policy) - continue; - - policy = policy_dbs->policy; - - /* clear all CPUs of this policy */ - cpumask_andnot(&cpumask, &cpumask, policy->cpus); - - /* - * Update sampling rate for CPUs whose policy is governed by - * dbs_data. In case of governor_per_policy, only a single - * policy will be governed by dbs_data, otherwise there can be - * multiple policies that are governed by the same dbs_data. - */ - if (dbs_data == policy_dbs->dbs_data) { - mutex_lock(&policy_dbs->timer_mutex); - /* - * On 32-bit architectures this may race with the - * sample_delay_ns read in dbs_update_util_handler(), - * but that really doesn't matter. If the read returns - * a value that's too big, the sample will be skipped, - * but the next invocation of dbs_update_util_handler() - * (when the update has been completed) will take a - * sample. If the returned value is too small, the - * sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect - * immediately anyway. - * - * If this runs in parallel with dbs_work_handler(), we - * may end up overwriting the sample_delay_ns value that - * it has just written, but the difference should not be - * too big and it will be corrected next time a sample - * is taken, so it shouldn't be significant. - */ - gov_update_sample_delay(policy_dbs, new_rate); - mutex_unlock(&policy_dbs->timer_mutex); - } + gov_update_sample_delay(policy_dbs, new_rate); + mutex_unlock(&policy_dbs->timer_mutex); } - - mutex_unlock(&dbs_data_mutex); } static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, From 69cee7147b4a4ea02085d571cd2d9974d4a4d8d5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:11 +0530 Subject: [PATCH 23/61] cpufreq: Merge cpufreq_offline_prepare/finish routines Commit 1aee40ac9c86 (cpufreq: Invoke __cpufreq_remove_dev_finish() after releasing cpu_hotplug.lock) split the cpufreq's CPU offline routine in two pieces, one of them to be run with CPU offline/online locked and the other to be called later. The reason for that split was a possible deadlock scenario involving cpufreq sysfs attributes and CPU offline. However, the handling of CPU offline in cpufreq has changed since then. Policy sysfs attributes are never removed during CPU offline, so there's no need to worry about accessing them during CPU offline, because that can't lead to any deadlocks now. Governor sysfs attributes are still removed in __cpufreq_governor(_EXIT), but there is a new kobject type for them now and its show/store callbacks don't lock CPU offline/online (they don't need to do that). This means that the CPU offline code in cpufreq doesn't need to be split any more, so combine cpufreq_offline_prepare() with cpufreq_offline_finish(). Signed-off-by: Viresh Kumar [ rjw: Changelog ] Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e92e9eab7c6c9..f65553dc48c99 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1362,9 +1362,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return ret; } -static void cpufreq_offline_prepare(unsigned int cpu) +static void cpufreq_offline(unsigned int cpu) { struct cpufreq_policy *policy; + int ret; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1375,7 +1376,7 @@ static void cpufreq_offline_prepare(unsigned int cpu) } if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } @@ -1398,34 +1399,23 @@ static void cpufreq_offline_prepare(unsigned int cpu) /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); if (ret) pr_err("%s: Failed to start governor\n", __func__); } - } else if (cpufreq_driver->stop_cpu) { - cpufreq_driver->stop_cpu(policy); - } -} -static void cpufreq_offline_finish(unsigned int cpu) -{ - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - - if (!policy) { - pr_debug("%s: No cpu_data found\n", __func__); return; } - /* Only proceed for inactive policies */ - if (!policy_is_inactive(policy)) - return; + if (cpufreq_driver->stop_cpu) + cpufreq_driver->stop_cpu(policy); /* If cpu is last user of policy, free policy */ if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1454,10 +1444,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; - if (cpu_online(cpu)) { - cpufreq_offline_prepare(cpu); - cpufreq_offline_finish(cpu); - } + if (cpu_online(cpu)) + cpufreq_offline(cpu); cpumask_clear_cpu(cpu, policy->real_cpus); remove_cpu_dev_symlink(policy, cpu); @@ -2305,11 +2293,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - cpufreq_offline_prepare(cpu); - break; - - case CPU_POST_DEAD: - cpufreq_offline_finish(cpu); + cpufreq_offline(cpu); break; case CPU_DOWN_FAILED: From 49f18560f8bac5315047edfb673dd13d56cbcbc9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:12 +0530 Subject: [PATCH 24/61] cpufreq: Call __cpufreq_governor() with policy->rwsem held The cpufreq core code is not consistent with respect to invoking __cpufreq_governor() under policy->rwsem. Changing all code to always hold policy->rwsem around __cpufreq_governor() invocations will allow us to remove cpufreq_governor_lock that is used today because we can't guarantee that __cpufreq_governor() isn't executed twice in parallel for the same policy. We should also ensure that policy->rwsem is held across governor state changes. For example, while adding a CPU to the policy in the CPU online path, we need to stop the governor, change policy->cpus, start the governor and then refresh its limits. The complete sequence must be guaranteed to complete without interruptions by concurrent governor state updates. That can be achieved by holding policy->rwsem around those sequences of operations. Also note that after this patch cpufreq_driver->stop_cpu() and ->exit() will get called under policy->rwsem which wasn't the case earlier. That shouldn't have any side effects, though. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 49 ++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f65553dc48c99..6928768924570 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1049,30 +1049,29 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp if (cpumask_test_cpu(cpu, policy->cpus)) return 0; + down_write(&policy->rwsem); if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { pr_err("%s: Failed to stop governor\n", __func__); - return ret; + goto unlock; } } - down_write(&policy->rwsem); cpumask_set_cpu(cpu, policy->cpus); - up_write(&policy->rwsem); if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); - if (ret) { + if (ret) pr_err("%s: Failed to start governor\n", __func__); - return ret; - } } - return 0; +unlock: + up_write(&policy->rwsem); + return ret; } static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) @@ -1375,13 +1374,13 @@ static void cpufreq_offline(unsigned int cpu) return; } + down_write(&policy->rwsem); if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } - down_write(&policy->rwsem); cpumask_clear_cpu(cpu, policy->cpus); if (policy_is_inactive(policy)) { @@ -1394,7 +1393,6 @@ static void cpufreq_offline(unsigned int cpu) /* Nominate new CPU */ policy->cpu = cpumask_any(policy->cpus); } - up_write(&policy->rwsem); /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { @@ -1407,7 +1405,7 @@ static void cpufreq_offline(unsigned int cpu) pr_err("%s: Failed to start governor\n", __func__); } - return; + goto unlock; } if (cpufreq_driver->stop_cpu) @@ -1429,6 +1427,9 @@ static void cpufreq_offline(unsigned int cpu) cpufreq_driver->exit(policy); policy->freq_table = NULL; } + +unlock: + up_write(&policy->rwsem); } /** @@ -1625,6 +1626,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend); void cpufreq_suspend(void) { struct cpufreq_policy *policy; + int ret; if (!cpufreq_driver) return; @@ -1635,7 +1637,11 @@ void cpufreq_suspend(void) pr_debug("%s: Suspending Governors\n", __func__); for_each_active_policy(policy) { - if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) + down_write(&policy->rwsem); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + up_write(&policy->rwsem); + + if (ret) pr_err("%s: Failed to stop governor for policy: %p\n", __func__, policy); else if (cpufreq_driver->suspend @@ -1657,6 +1663,7 @@ void cpufreq_suspend(void) void cpufreq_resume(void) { struct cpufreq_policy *policy; + int ret; if (!cpufreq_driver) return; @@ -1669,13 +1676,20 @@ void cpufreq_resume(void) pr_debug("%s: Resuming Governors\n", __func__); for_each_active_policy(policy) { - if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) + if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) { pr_err("%s: Failed to resume driver: %p\n", __func__, policy); - else if (__cpufreq_governor(policy, CPUFREQ_GOV_START) - || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) - pr_err("%s: Failed to start governor for policy: %p\n", - __func__, policy); + } else { + down_write(&policy->rwsem); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (!ret) + __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + up_write(&policy->rwsem); + + if (ret) + pr_err("%s: Failed to start governor for policy: %p\n", + __func__, policy); + } } /* @@ -2326,8 +2340,11 @@ static int cpufreq_boost_set_sw(int state) __func__); break; } + + down_write(&policy->rwsem); policy->user_policy.max = policy->max; __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + up_write(&policy->rwsem); } } From 99522fe6788f5bf627dce7c20ed9484c933511a3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:13 +0530 Subject: [PATCH 25/61] cpufreq: Remove cpufreq_governor_lock We used to drop policy->rwsem just before calling __cpufreq_governor() in some cases earlier and so it was possible that __cpufreq_governor() ran concurrently via separate threads for the same policy. In order to guarantee valid state transitions for governors, 'governor_enabled' was required to be protected using some locking and cpufreq_governor_lock was added for that. But now __cpufreq_governor() is always called under policy->rwsem, and 'governor_enabled' is protected against races even without cpufreq_governor_lock. Get rid of the extra lock now. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw : Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 -------- drivers/cpufreq/cpufreq_governor.h | 1 - 2 files changed, 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6928768924570..bc93272b4a129 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -147,8 +147,6 @@ void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) rcu_read_unlock(); } -DEFINE_MUTEX(cpufreq_governor_lock); - /* Flag to suspend/resume CPUFreq governors */ static bool cpufreq_suspended; @@ -2015,11 +2013,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); - mutex_lock(&cpufreq_governor_lock); if ((policy->governor_enabled && event == CPUFREQ_GOV_START) || (!policy->governor_enabled && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { - mutex_unlock(&cpufreq_governor_lock); return -EBUSY; } @@ -2028,8 +2024,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, else if (event == CPUFREQ_GOV_START) policy->governor_enabled = true; - mutex_unlock(&cpufreq_governor_lock); - ret = policy->governor->governor(policy, event); if (!ret) { @@ -2039,12 +2033,10 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->governor->initialized--; } else { /* Restore original values */ - mutex_lock(&cpufreq_governor_lock); if (event == CPUFREQ_GOV_STOP) policy->governor_enabled = true; else if (event == CPUFREQ_GOV_START) policy->governor_enabled = false; - mutex_unlock(&cpufreq_governor_lock); } if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8bf4775ce03c7..e9ec411042c34 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -232,7 +232,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) } extern struct mutex dbs_data_mutex; -extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) From 581c214b21e4faba06d913952e38e80635d9ada5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:14 +0530 Subject: [PATCH 26/61] cpufreq: governor: No need to manage state machine now The cpufreq core now guarantees that policy->rwsem won't be dropped while running the ->governor callback for the CPUFREQ_GOV_POLICY_EXIT event and will be held acquired until the complete sequence of governor state changes has finished. This allows governor state machine checks to be dropped from multiple functions in cpufreq_governor.c. This also means that policy_dbs->policy can be initialized upfront, so the entire initialization of struct policy_dbs can be carried out in one place. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 2f35270fbd437..a34de9d10cbc8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -233,8 +233,10 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) synchronize_rcu(); } -static void gov_cancel_work(struct policy_dbs_info *policy_dbs) +static void gov_cancel_work(struct cpufreq_policy *policy) { + struct policy_dbs_info *policy_dbs = policy->governor_data; + /* Tell dbs_update_util_handler() to skip queuing up work items. */ atomic_inc(&policy_dbs->work_count); /* @@ -331,6 +333,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli if (!policy_dbs) return NULL; + policy_dbs->policy = policy; mutex_init(&policy_dbs->timer_mutex); atomic_set(&policy_dbs->work_count, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); @@ -458,10 +461,6 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy_dbs->dbs_data; int count; - /* State should be equivalent to INIT */ - if (policy_dbs->policy) - return -EBUSY; - mutex_lock(&dbs_data->mutex); list_del(&policy_dbs->list); count = --dbs_data->usage_count; @@ -497,10 +496,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (!policy->cur) return -EINVAL; - /* State should be equivalent to INIT */ - if (policy_dbs->policy) - return -EBUSY; - sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; @@ -525,7 +520,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } - policy_dbs->policy = policy; if (gov->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -548,14 +542,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { - struct policy_dbs_info *policy_dbs = policy->governor_data; - - /* State should be equivalent to START */ - if (!policy_dbs->policy) - return -EBUSY; - - gov_cancel_work(policy_dbs); - policy_dbs->policy = NULL; + gov_cancel_work(policy); return 0; } @@ -564,10 +551,6 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; - /* State should be equivalent to START */ - if (!policy_dbs->policy) - return -EBUSY; - mutex_lock(&policy_dbs->timer_mutex); if (policy->max < policy->cur) __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); From aded387b94b69aeab10e1d112bab7f82c9241527 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:15 +0530 Subject: [PATCH 27/61] cpufreq: conservative: Update sample_delay_ns immediately The ondemand governor already updates sample_delay_ns immediately on updates to the sampling rate, but conservative doesn't do that. It was left out earlier as the code was really too complex to get that done easily. Things are sorted out very well now, however, and the conservative governor can be modified to follow ondemand in that respect. Moreover, since the code needed to implement that in the conservative governor would be identical to the corresponding ondemand governor's code, make that code common and change both governors to use it. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 14 ------ drivers/cpufreq/cpufreq_governor.c | 63 +++++++++++++++++++++++ drivers/cpufreq/cpufreq_governor.h | 2 + drivers/cpufreq/cpufreq_ondemand.c | 69 -------------------------- 4 files changed, 65 insertions(+), 83 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index ed081dbce00cd..6243502ce24db 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -136,20 +136,6 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, return count; } -static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, - size_t count) -{ - unsigned int input; - int ret; - ret = sscanf(buf, "%u", &input); - - if (ret != 1) - return -EINVAL; - - dbs_data->sampling_rate = max(input, dbs_data->min_sampling_rate); - return count; -} - static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, size_t count) { diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index a34de9d10cbc8..d41db19a9bb77 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -25,6 +25,69 @@ DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); +/* Common sysfs tunables */ +/** + * store_sampling_rate - update sampling rate effective immediately if needed. + * + * If new rate is smaller than the old, simply updating + * dbs.sampling_rate might not be appropriate. For example, if the + * original sampling_rate was 1 second and the requested new sampling rate is 10 + * ms because the user needs immediate reaction from ondemand governor, but not + * sure if higher frequency will be required or not, then, the governor may + * change the sampling rate too late; up to 1 second later. Thus, if we are + * reducing the sampling rate, we need to make the new value effective + * immediately. + * + * On the other hand, if new rate is larger than the old, then we may evaluate + * the load too soon, and it might we worth updating sample_delay_ns then as + * well. + * + * This must be called with dbs_data->mutex held, otherwise traversing + * policy_dbs_list isn't safe. + */ +ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct policy_dbs_info *policy_dbs; + unsigned int rate; + int ret; + ret = sscanf(buf, "%u", &rate); + if (ret != 1) + return -EINVAL; + + dbs_data->sampling_rate = max(rate, dbs_data->min_sampling_rate); + + /* + * We are operating under dbs_data->mutex and so the list and its + * entries can't be freed concurrently. + */ + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + mutex_lock(&policy_dbs->timer_mutex); + /* + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), but that + * really doesn't matter. If the read returns a value that's + * too big, the sample will be skipped, but the next invocation + * of dbs_update_util_handler() (when the update has been + * completed) will take a sample. If the returned value is too + * small, the sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect immediately + * anyway. + * + * If this runs in parallel with dbs_work_handler(), we may end + * up overwriting the sample_delay_ns value that it has just + * written, but the difference should not be too big and it will + * be corrected next time a sample is taken, so it shouldn't be + * significant. + */ + gov_update_sample_delay(policy_dbs, dbs_data->sampling_rate); + mutex_unlock(&policy_dbs->timer_mutex); + } + + return count; +} +EXPORT_SYMBOL_GPL(store_sampling_rate); + static inline struct dbs_data *to_dbs_data(struct kobject *kobj) { return container_of(kobj, struct dbs_data, kobj); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index e9ec411042c34..8138eff5e25bb 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -238,4 +238,6 @@ void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); void od_unregister_powersave_bias_handler(void); +ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 38301c6b31c78..12213823cc93e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -221,75 +221,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) /************************** sysfs interface ************************/ static struct dbs_governor od_dbs_gov; -/** - * update_sampling_rate - update sampling rate effective immediately if needed. - * @new_rate: new sampling rate - * - * If new rate is smaller than the old, simply updating - * dbs.sampling_rate might not be appropriate. For example, if the - * original sampling_rate was 1 second and the requested new sampling rate is 10 - * ms because the user needs immediate reaction from ondemand governor, but not - * sure if higher frequency will be required or not, then, the governor may - * change the sampling rate too late; up to 1 second later. Thus, if we are - * reducing the sampling rate, we need to make the new value effective - * immediately. - * - * On the other hand, if new rate is larger than the old, then we may evaluate - * the load too soon, and it might we worth updating sample_delay_ns then as - * well. - * - * This must be called with dbs_data->mutex held, otherwise traversing - * policy_dbs_list isn't safe. - */ -static void update_sampling_rate(struct dbs_data *dbs_data, - unsigned int new_rate) -{ - struct policy_dbs_info *policy_dbs; - - dbs_data->sampling_rate = new_rate = max(new_rate, - dbs_data->min_sampling_rate); - - /* - * We are operating under dbs_data->mutex and so the list and its - * entries can't be freed concurrently. - */ - list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { - mutex_lock(&policy_dbs->timer_mutex); - /* - * On 32-bit architectures this may race with the - * sample_delay_ns read in dbs_update_util_handler(), but that - * really doesn't matter. If the read returns a value that's - * too big, the sample will be skipped, but the next invocation - * of dbs_update_util_handler() (when the update has been - * completed) will take a sample. If the returned value is too - * small, the sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect immediately - * anyway. - * - * If this runs in parallel with dbs_work_handler(), we may end - * up overwriting the sample_delay_ns value that it has just - * written, but the difference should not be too big and it will - * be corrected next time a sample is taken, so it shouldn't be - * significant. - */ - gov_update_sample_delay(policy_dbs, new_rate); - mutex_unlock(&policy_dbs->timer_mutex); - } -} - -static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, - size_t count) -{ - unsigned int input; - int ret; - ret = sscanf(buf, "%u", &input); - if (ret != 1) - return -EINVAL; - - update_sampling_rate(dbs_data, input); - return count; -} - static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, size_t count) { From a23d6d180914dd91e320283c81e4f84f028e24f4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:16 +0530 Subject: [PATCH 28/61] cpufreq: ondemand: Rearrange od_dbs_timer() to avoid updating delay Avoid extra checks in od_dbs_timer() by rearranging updates to the local delay variable in it. Signed-off-by: Viresh Kumar [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_ondemand.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 12213823cc93e..0b79f1488be43 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -194,7 +194,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - int delay = 0, sample_type = dbs_info->sample_type; + int delay, sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -208,13 +208,12 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; delay = dbs_info->freq_hi_jiffies; + } else { + delay = delay_for_sampling_rate(dbs_data->sampling_rate + * dbs_info->rate_mult); } } - if (!delay) - delay = delay_for_sampling_rate(dbs_data->sampling_rate - * dbs_info->rate_mult); - return delay; } From b9db42730aeb23f91d7585786de25a260ab04098 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 22:15:34 +0100 Subject: [PATCH 29/61] cpufreq: governor: Avoid irq_work_queue_on() crash on non-SMP ARM As it turns out, irq_work_queue_on() will crash if invoked on non-SMP ARM platforms, but in fact it is not necessary to use that function in the cpufreq governor code (as it doesn't matter to that code which CPU will handle the irq_work), so change it to always use irq_work_queue(). Fixes: 8fb47ff100af (cpufreq: governor: Replace timers with utilization update callbacks) Reported-and-tested-by: Guenter Roeck Reported-and-tested-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index d41db19a9bb77..580b692d6df4c 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -350,15 +350,6 @@ static void dbs_irq_work(struct irq_work *irq_work) schedule_work(&policy_dbs->work); } -static inline void gov_queue_irq_work(struct policy_dbs_info *policy_dbs) -{ -#ifdef CONFIG_SMP - irq_work_queue_on(&policy_dbs->irq_work, smp_processor_id()); -#else - irq_work_queue(&policy_dbs->irq_work); -#endif -} - static void dbs_update_util_handler(struct update_util_data *data, u64 time, unsigned long util, unsigned long max) { @@ -378,7 +369,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, delta_ns = time - policy_dbs->last_sample_time; if ((s64)delta_ns >= policy_dbs->sample_delay_ns) { policy_dbs->last_sample_time = time; - gov_queue_irq_work(policy_dbs); + irq_work_queue(&policy_dbs->irq_work); return; } } From f62b93740c30d0a3f50258d45415f00b763dd70a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:12:56 +0100 Subject: [PATCH 30/61] cpufreq: governor: Simplify gov_cancel_work() slightly The atomic work counter incrementation in gov_cancel_work() is not necessary any more, because work items won't be queued up after gov_clear_update_util() anyway, so drop it along with the comment about how it may be missed by the gov_clear_update_util(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 580b692d6df4c..c78af11a51f03 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -300,13 +300,6 @@ static void gov_cancel_work(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; - /* Tell dbs_update_util_handler() to skip queuing up work items. */ - atomic_inc(&policy_dbs->work_count); - /* - * If dbs_update_util_handler() is already running, it may not notice - * the incremented work_count, so wait for it to complete to prevent its - * work item from being queued up after the cancel_work_sync() below. - */ gov_clear_update_util(policy_dbs->policy); irq_work_sync(&policy_dbs->irq_work); cancel_work_sync(&policy_dbs->work); @@ -360,7 +353,6 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * The work may not be allowed to be queued up right now. * Possible reasons: * - Work has already been queued up or is in progress. - * - The governor is being stopped. * - It is too early (too little time from the previous sample). */ if (atomic_inc_return(&policy_dbs->work_count) == 1) { From e4db2813d2e558b6b6bee464308678a57732b390 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:13:42 +0100 Subject: [PATCH 31/61] cpufreq: governor: Avoid atomic operations in hot paths Rework the handling of work items by dbs_update_util_handler() and dbs_work_handler() so the former (which is executed in scheduler paths) only uses atomic operations when absolutely necessary. That is, when the policy is shared and dbs_update_util_handler() has already decided that this is the time to queue up a work item. In particular, this avoids the atomic ops entirely on platforms where policy objects are never shared. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 51 ++++++++++++++++++++---------- drivers/cpufreq/cpufreq_governor.h | 3 ++ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index c78af11a51f03..e5a08a13ca84f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -304,6 +304,7 @@ static void gov_cancel_work(struct cpufreq_policy *policy) irq_work_sync(&policy_dbs->irq_work); cancel_work_sync(&policy_dbs->work); atomic_set(&policy_dbs->work_count, 0); + policy_dbs->work_in_progress = false; } static void dbs_work_handler(struct work_struct *work) @@ -326,13 +327,15 @@ static void dbs_work_handler(struct work_struct *work) policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&policy_dbs->timer_mutex); + /* Allow the utilization update handler to queue up more work. */ + atomic_set(&policy_dbs->work_count, 0); /* - * If the atomic operation below is reordered with respect to the - * sample delay modification, the utilization update handler may end - * up using a stale sample delay value. + * If the update below is reordered with respect to the sample delay + * modification, the utilization update handler may end up using a stale + * sample delay value. */ - smp_mb__before_atomic(); - atomic_dec(&policy_dbs->work_count); + smp_wmb(); + policy_dbs->work_in_progress = false; } static void dbs_irq_work(struct irq_work *irq_work) @@ -348,6 +351,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; + u64 delta_ns; /* * The work may not be allowed to be queued up right now. @@ -355,17 +359,30 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * - Work has already been queued up or is in progress. * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&policy_dbs->work_count) == 1) { - u64 delta_ns; - - delta_ns = time - policy_dbs->last_sample_time; - if ((s64)delta_ns >= policy_dbs->sample_delay_ns) { - policy_dbs->last_sample_time = time; - irq_work_queue(&policy_dbs->irq_work); - return; - } - } - atomic_dec(&policy_dbs->work_count); + if (policy_dbs->work_in_progress) + return; + + /* + * If the reads below are reordered before the check above, the value + * of sample_delay_ns used in the computation may be stale. + */ + smp_rmb(); + delta_ns = time - policy_dbs->last_sample_time; + if ((s64)delta_ns < policy_dbs->sample_delay_ns) + return; + + /* + * If the policy is not shared, the irq_work may be queued up right away + * at this point. Otherwise, we need to ensure that only one of the + * CPUs sharing the policy will do that. + */ + if (policy_dbs->is_shared && + !atomic_add_unless(&policy_dbs->work_count, 1, 1)) + return; + + policy_dbs->last_sample_time = time; + policy_dbs->work_in_progress = true; + irq_work_queue(&policy_dbs->irq_work); } static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, @@ -542,6 +559,8 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (!policy->cur) return -EINVAL; + policy_dbs->is_shared = policy_is_shared(policy); + sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8138eff5e25bb..521daac38ba51 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -130,6 +130,9 @@ struct policy_dbs_info { /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; struct list_head list; + /* Status indicators */ + bool is_shared; /* This object is used by multiple CPUs */ + bool work_in_progress; /* Work is being queued up or in progress */ }; static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, From 679b8fe43a6b723787cae1d9599ed776d7ce238b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:15:50 +0100 Subject: [PATCH 32/61] cpufreq: governor: Fix nice contribution computation in dbs_check_cpu() The contribution of the CPU nice time to the idle time in dbs_check_cpu() is computed in a bogus way, as the code may subtract current and previous nice values for different CPUs. That doesn't matter for cases when cpufreq policies are not shared, but may lead to problems otherwise. Fix the computation and simplify it to avoid taking unnecessary steps. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e5a08a13ca84f..c5469701a3ef8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -198,22 +198,10 @@ void dbs_check_cpu(struct cpufreq_policy *policy) j_cdbs->prev_cpu_idle = cur_idle_time; if (ignore_nice) { - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - u64 cur_nice; - unsigned long cur_nice_jiffies; + u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - - cdbs->prev_cpu_nice; - /* - * Assumption: nice time between sampling periods will - * be less than 2^32 jiffies for 32 bit sys - */ - cur_nice_jiffies = (unsigned long) - cputime64_to_jiffies64(cur_nice); - - cdbs->prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - idle_time += jiffies_to_usecs(cur_nice_jiffies); + idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice); + j_cdbs->prev_cpu_nice = cur_nice; } if (unlikely(!wall_time || wall_time < idle_time)) From 57eb832f90e645dcb97d651ad052c0537cc1b3a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Feb 2016 00:58:47 +0100 Subject: [PATCH 33/61] cpufreq: governor: Clean up load-related computations Clean up some load-related computations in dbs_check_cpu() and cpufreq_governor_start() to get rid of unnecessary operations and type casts and make the code easier to read. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index c5469701a3ef8..1f580cb62902d 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -186,16 +186,15 @@ void dbs_check_cpu(struct cpufreq_policy *policy) io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); - wall_time = (unsigned int) - (cur_wall_time - j_cdbs->prev_cpu_wall); + wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; j_cdbs->prev_cpu_wall = cur_wall_time; - if (cur_idle_time < j_cdbs->prev_cpu_idle) - cur_idle_time = j_cdbs->prev_cpu_idle; - - idle_time = (unsigned int) - (cur_idle_time - j_cdbs->prev_cpu_idle); - j_cdbs->prev_cpu_idle = cur_idle_time; + if (cur_idle_time <= j_cdbs->prev_cpu_idle) { + idle_time = 0; + } else { + idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + j_cdbs->prev_cpu_idle = cur_idle_time; + } if (ignore_nice) { u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -562,13 +561,10 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); unsigned int prev_load; - j_cdbs->prev_cpu_idle = - get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); - prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - - j_cdbs->prev_cpu_idle); - j_cdbs->prev_load = 100 * prev_load / - (unsigned int)j_cdbs->prev_cpu_wall; + prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle; + j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall; if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; From 4cccf7555770b787fa80791a1407a27301f03920 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:19:31 +0100 Subject: [PATCH 34/61] cpufreq: governor: Get rid of the ->gov_check_cpu callback The way the ->gov_check_cpu governor callback is used by the ondemand and conservative governors is not really straightforward. Namely, the governor calls dbs_check_cpu() that updates the load information for the policy and the invokes ->gov_check_cpu() for the governor. To get rid of that entanglement, notice that cpufreq_governor_limits() doesn't need to call dbs_check_cpu() directly. Instead, it can simply reset the sample delay to 0 which will cause a sample to be taken immediately. The result of that is practically equivalent to calling dbs_check_cpu() except that it will trigger a full update of governor internal state and not just the ->gov_check_cpu() part. Following that observation, make cpufreq_governor_limits() reset the sample delay and turn dbs_check_cpu() into a function that will simply evaluate the load and return the result called dbs_update(). That function can now be called by governors from the routines that previously were pointed to by ->gov_check_cpu and those routines can be called directly by each governor instead of dbs_check_cpu(). This way ->gov_check_cpu becomes unnecessary, so drop it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 26 +++++++++----------------- drivers/cpufreq/cpufreq_governor.c | 15 ++++++++------- drivers/cpufreq/cpufreq_governor.h | 3 +-- drivers/cpufreq/cpufreq_ondemand.c | 15 +++++++++------ 4 files changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 6243502ce24db..2e9040e8640c4 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -44,20 +44,20 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, * Any frequency increase takes it to the maximum frequency. Frequency reduction * happens at minimum steps of 5% (default) of maximum frequency */ -static void cs_check_cpu(int cpu, unsigned int load) +static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; + struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int load = dbs_update(policy); /* * break out if we 'cannot' reduce the speed as the user might * want freq_step to be zero */ if (cs_tuners->freq_step == 0) - return; + goto out; /* Check for frequency increase */ if (load > dbs_data->up_threshold) { @@ -65,7 +65,7 @@ static void cs_check_cpu(int cpu, unsigned int load) /* if we are already at full speed then break out early */ if (dbs_info->requested_freq == policy->max) - return; + goto out; dbs_info->requested_freq += get_freq_target(cs_tuners, policy); @@ -74,12 +74,12 @@ static void cs_check_cpu(int cpu, unsigned int load) __cpufreq_driver_target(policy, dbs_info->requested_freq, CPUFREQ_RELATION_H); - return; + goto out; } /* if sampling_down_factor is active break out early */ if (++dbs_info->down_skip < dbs_data->sampling_down_factor) - return; + goto out; dbs_info->down_skip = 0; /* Check for frequency decrease */ @@ -89,7 +89,7 @@ static void cs_check_cpu(int cpu, unsigned int load) * if we cannot reduce the frequency anymore, break out early */ if (policy->cur == policy->min) - return; + goto out; freq_target = get_freq_target(cs_tuners, policy); if (dbs_info->requested_freq > freq_target) @@ -99,16 +99,9 @@ static void cs_check_cpu(int cpu, unsigned int load) __cpufreq_driver_target(policy, dbs_info->requested_freq, CPUFREQ_RELATION_L); - return; } -} - -static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) -{ - struct policy_dbs_info *policy_dbs = policy->governor_data; - struct dbs_data *dbs_data = policy_dbs->dbs_data; - dbs_check_cpu(policy); + out: return delay_for_sampling_rate(dbs_data->sampling_rate); } @@ -300,7 +293,6 @@ static struct dbs_governor cs_dbs_gov = { .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, - .gov_check_cpu = cs_check_cpu, .init = cs_init, .exit = cs_exit, }; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 1f580cb62902d..99d25af6485b5 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -140,9 +140,8 @@ static const struct sysfs_ops governor_sysfs_ops = { .store = governor_store, }; -void dbs_check_cpu(struct cpufreq_policy *policy) +unsigned int dbs_update(struct cpufreq_policy *policy) { - int cpu = policy->cpu; struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; @@ -154,7 +153,7 @@ void dbs_check_cpu(struct cpufreq_policy *policy) if (gov->governor == GOV_ONDEMAND) { struct od_cpu_dbs_info_s *od_dbs_info = - gov->get_cpu_dbs_info_s(cpu); + gov->get_cpu_dbs_info_s(policy->cpu); /* * Sometimes, the ondemand governor uses an additional @@ -250,10 +249,9 @@ void dbs_check_cpu(struct cpufreq_policy *policy) if (load > max_load) max_load = load; } - - gov->gov_check_cpu(cpu, max_load); + return max_load; } -EXPORT_SYMBOL_GPL(dbs_check_cpu); +EXPORT_SYMBOL_GPL(dbs_update); void gov_set_update_util(struct policy_dbs_info *policy_dbs, unsigned int delay_us) @@ -601,11 +599,14 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; mutex_lock(&policy_dbs->timer_mutex); + if (policy->max < policy->cur) __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); else if (policy->min > policy->cur) __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(policy); + + gov_update_sample_delay(policy_dbs, 0); + mutex_unlock(&policy_dbs->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 521daac38ba51..38b9512820b0d 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -202,7 +202,6 @@ struct dbs_governor { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); - void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); @@ -235,7 +234,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) } extern struct mutex dbs_data_mutex; -void dbs_check_cpu(struct cpufreq_policy *policy); +unsigned int dbs_update(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 0b79f1488be43..707c017f4e67f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -150,13 +150,13 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) * (default), then we try to increase frequency. Else, we adjust the frequency * proportional to load. */ -static void od_check_cpu(int cpu, unsigned int load) +static void od_update(struct cpufreq_policy *policy) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = dbs_info->cdbs.policy_dbs; - struct cpufreq_policy *policy = policy_dbs->policy; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int load = dbs_update(policy); dbs_info->freq_lo = 0; @@ -198,12 +198,16 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; - if (sample_type == OD_SUB_SAMPLE) { + /* + * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore + * it then. + */ + if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { delay = dbs_info->freq_lo_jiffies; __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { - dbs_check_cpu(policy); + od_update(policy); if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; @@ -428,7 +432,6 @@ static struct dbs_governor od_dbs_gov = { .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer, - .gov_check_cpu = od_check_cpu, .gov_ops = &od_ops, .init = od_init, .exit = od_exit, From 78347cdb89065f9d40ea28596ef2bd8058eb6d12 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:20:11 +0100 Subject: [PATCH 35/61] cpufreq: governor: Reset sample delay in store_sampling_rate() If store_sampling_rate() updates the sample delay when the ondemand governor is in the middle of its high/low dance (OD_SUB_SAMPLE sample type is set), the governor will still do the bottom half of the previous sample which may take too much time. To prevent that from happening, change store_sampling_rate() to always reset the sample delay to 0 which also is consistent with the new behavior of cpufreq_governor_limits(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 99d25af6485b5..fd4cdc2db238b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -38,10 +38,6 @@ EXPORT_SYMBOL_GPL(dbs_data_mutex); * reducing the sampling rate, we need to make the new value effective * immediately. * - * On the other hand, if new rate is larger than the old, then we may evaluate - * the load too soon, and it might we worth updating sample_delay_ns then as - * well. - * * This must be called with dbs_data->mutex held, otherwise traversing * policy_dbs_list isn't safe. */ @@ -69,18 +65,14 @@ ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, * really doesn't matter. If the read returns a value that's * too big, the sample will be skipped, but the next invocation * of dbs_update_util_handler() (when the update has been - * completed) will take a sample. If the returned value is too - * small, the sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect immediately - * anyway. + * completed) will take a sample. * * If this runs in parallel with dbs_work_handler(), we may end * up overwriting the sample_delay_ns value that it has just - * written, but the difference should not be too big and it will - * be corrected next time a sample is taken, so it shouldn't be - * significant. + * written, but it will be corrected next time a sample is + * taken, so it shouldn't be significant. */ - gov_update_sample_delay(policy_dbs, dbs_data->sampling_rate); + gov_update_sample_delay(policy_dbs, 0); mutex_unlock(&policy_dbs->timer_mutex); } From 57dc3bcd454eb420ddf25d89852993b61b351327 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:20:51 +0100 Subject: [PATCH 36/61] cpufreq: governor: Move rate_mult to struct policy_dbs The rate_mult field in struct od_cpu_dbs_info_s is used by the code shared with the conservative governor and to access it that code has to do an ugly governor type check. However, first of all it is ever only used for policy->cpu, so it is per-policy rather than per-CPU and second, it is initialized to 1 by cpufreq_governor_start(), so if the conservative governor never modifies it, it will have no effect on the results of any computations. For these reasons, move rate_mult to struct policy_dbs_info (as a common field). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 25 +++++++++---------------- drivers/cpufreq/cpufreq_governor.h | 3 ++- drivers/cpufreq/cpufreq_ondemand.c | 23 +++++++++++++++-------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index fd4cdc2db238b..b002c0d626ea9 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -138,24 +138,17 @@ unsigned int dbs_update(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - unsigned int sampling_rate = dbs_data->sampling_rate; unsigned int ignore_nice = dbs_data->ignore_nice_load; unsigned int max_load = 0; - unsigned int j; + unsigned int sampling_rate, j; - if (gov->governor == GOV_ONDEMAND) { - struct od_cpu_dbs_info_s *od_dbs_info = - gov->get_cpu_dbs_info_s(policy->cpu); - - /* - * Sometimes, the ondemand governor uses an additional - * multiplier to give long delays. So apply this multiplier to - * the 'sampling_rate', so as to keep the wake-up-from-idle - * detection logic a bit conservative. - */ - sampling_rate *= od_dbs_info->rate_mult; - - } + /* + * Sometimes governors may use an additional multiplier to increase + * sample delays temporarily. Apply that multiplier to sampling_rate + * so as to keep the wake-up-from-idle detection logic a bit + * conservative. + */ + sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult; /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { @@ -537,6 +530,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) return -EINVAL; policy_dbs->is_shared = policy_is_shared(policy); + policy_dbs->rate_mult = 1; sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; @@ -570,7 +564,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct od_ops *od_ops = gov->gov_ops; struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu); - od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; od_ops->powersave_bias_init_cpu(cpu); } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 38b9512820b0d..f21d1e125cba6 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -130,6 +130,8 @@ struct policy_dbs_info { /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; struct list_head list; + /* Multiplier for increasing sample delay temporarily. */ + unsigned int rate_mult; /* Status indicators */ bool is_shared; /* This object is used by multiple CPUs */ bool work_in_progress; /* Work is being queued up or in progress */ @@ -163,7 +165,6 @@ struct od_cpu_dbs_info_s { unsigned int freq_lo; unsigned int freq_lo_jiffies; unsigned int freq_hi_jiffies; - unsigned int rate_mult; unsigned int sample_type:1; }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 707c017f4e67f..812d9949a0c49 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -164,7 +164,7 @@ static void od_update(struct cpufreq_policy *policy) if (load > dbs_data->up_threshold) { /* If switching to max speed, apply sampling_down_factor */ if (policy->cur < policy->max) - dbs_info->rate_mult = dbs_data->sampling_down_factor; + policy_dbs->rate_mult = dbs_data->sampling_down_factor; dbs_freq_increase(policy, policy->max); } else { /* Calculate the next frequency proportional to load */ @@ -175,7 +175,7 @@ static void od_update(struct cpufreq_policy *policy) freq_next = min_f + load * (max_f - min_f) / 100; /* No longer fully busy, reset rate_mult */ - dbs_info->rate_mult = 1; + policy_dbs->rate_mult = 1; if (!od_tuners->powersave_bias) { __cpufreq_driver_target(policy, freq_next, @@ -214,7 +214,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) delay = dbs_info->freq_hi_jiffies; } else { delay = delay_for_sampling_rate(dbs_data->sampling_rate - * dbs_info->rate_mult); + * policy_dbs->rate_mult); } } @@ -266,20 +266,27 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - unsigned int input, j; + struct policy_dbs_info *policy_dbs; + unsigned int input; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; + dbs_data->sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - j); - dbs_info->rate_mult = 1; + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + /* + * Doing this without locking might lead to using different + * rate_mult values in od_update() and od_dbs_timer(). + */ + mutex_lock(&policy_dbs->timer_mutex); + policy_dbs->rate_mult = 1; + mutex_unlock(&policy_dbs->timer_mutex); } + return count; } From 6e96c5b3ac5181d4b787590e54c4af99d3fa5f2e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:21:35 +0100 Subject: [PATCH 37/61] cpufreq: ondemand: Simplify conditionals in od_dbs_timer() Reduce the indentation level in the conditionals in od_dbs_timer() and drop the delay variable from it. No functional changes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Viresh Kumar --- drivers/cpufreq/cpufreq_ondemand.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 812d9949a0c49..cb5a097c19ea9 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -194,7 +194,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - int delay, sample_type = dbs_info->sample_type; + int sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -203,22 +203,20 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) * it then. */ if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { - delay = dbs_info->freq_lo_jiffies; __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); - } else { - od_update(policy); - if (dbs_info->freq_lo) { - /* Setup timer for SUB_SAMPLE */ - dbs_info->sample_type = OD_SUB_SAMPLE; - delay = dbs_info->freq_hi_jiffies; - } else { - delay = delay_for_sampling_rate(dbs_data->sampling_rate - * policy_dbs->rate_mult); - } + return dbs_info->freq_lo_jiffies; + } + + od_update(policy); + + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = OD_SUB_SAMPLE; + return dbs_info->freq_hi_jiffies; } - return delay; + return delay_for_sampling_rate(dbs_data->sampling_rate * policy_dbs->rate_mult); } /************************** sysfs interface ************************/ From 07aa4402a009bc83194860e7869c491bab854d1c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:22:13 +0100 Subject: [PATCH 38/61] cpufreq: governor: Use microseconds in sample delay computations Do not convert microseconds to jiffies and the other way around in governor computations related to the sampling rate and sample delay and drop delay_for_sampling_rate() which isn't of any use then. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 4 +--- drivers/cpufreq/cpufreq_governor.h | 15 ++------------ drivers/cpufreq/cpufreq_ondemand.c | 28 ++++++++++++-------------- 4 files changed, 17 insertions(+), 32 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 2e9040e8640c4..4a6f8e1ed72e2 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -102,7 +102,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) } out: - return delay_for_sampling_rate(dbs_data->sampling_rate); + return dbs_data->sampling_rate; } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b002c0d626ea9..56dba71d1788f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -282,7 +282,6 @@ static void dbs_work_handler(struct work_struct *work) struct policy_dbs_info *policy_dbs; struct cpufreq_policy *policy; struct dbs_governor *gov; - unsigned int delay; policy_dbs = container_of(work, struct policy_dbs_info, work); policy = policy_dbs->policy; @@ -293,8 +292,7 @@ static void dbs_work_handler(struct work_struct *work) * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&policy_dbs->timer_mutex); - delay = gov->gov_dbs_timer(policy); - policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay); + gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy)); mutex_unlock(&policy_dbs->timer_mutex); /* Allow the utilization update handler to queue up more work. */ diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index f21d1e125cba6..7ae0c71143fa2 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -163,8 +163,8 @@ struct od_cpu_dbs_info_s { struct cpu_dbs_info cdbs; struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; - unsigned int freq_lo_jiffies; - unsigned int freq_hi_jiffies; + unsigned int freq_lo_delay_us; + unsigned int freq_hi_delay_us; unsigned int sample_type:1; }; @@ -223,17 +223,6 @@ struct od_ops { void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); }; -static inline int delay_for_sampling_rate(unsigned int sampling_rate) -{ - int delay = usecs_to_jiffies(sampling_rate); - - /* We want all CPUs to do sampling nearly on same jiffy */ - if (num_online_cpus() > 1) - delay -= jiffies % delay; - - return delay; -} - extern struct mutex dbs_data_mutex; unsigned int dbs_update(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index cb5a097c19ea9..a3ee74577404f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -66,8 +66,8 @@ static int should_io_be_busy(void) /* * Find right freq to be set now with powersave_bias on. - * Returns the freq_hi to be used right now and will set freq_hi_jiffies, - * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + * Returns the freq_hi to be used right now and will set freq_hi_delay_us, + * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. */ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation) @@ -75,7 +75,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; unsigned int index = 0; - unsigned int jiffies_total, jiffies_hi, jiffies_lo; + unsigned int delay_hi_us; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -84,7 +84,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, if (!dbs_info->freq_table) { dbs_info->freq_lo = 0; - dbs_info->freq_lo_jiffies = 0; + dbs_info->freq_lo_delay_us = 0; return freq_next; } @@ -107,17 +107,15 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, /* Find out how long we have to be in hi and lo freqs */ if (freq_hi == freq_lo) { dbs_info->freq_lo = 0; - dbs_info->freq_lo_jiffies = 0; + dbs_info->freq_lo_delay_us = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(dbs_data->sampling_rate); - jiffies_hi = (freq_avg - freq_lo) * jiffies_total; - jiffies_hi += ((freq_hi - freq_lo) / 2); - jiffies_hi /= (freq_hi - freq_lo); - jiffies_lo = jiffies_total - jiffies_hi; + delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; + delay_hi_us += (freq_hi - freq_lo) / 2; + delay_hi_us /= freq_hi - freq_lo; + dbs_info->freq_hi_delay_us = delay_hi_us; dbs_info->freq_lo = freq_lo; - dbs_info->freq_lo_jiffies = jiffies_lo; - dbs_info->freq_hi_jiffies = jiffies_hi; + dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; return freq_hi; } @@ -205,7 +203,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); - return dbs_info->freq_lo_jiffies; + return dbs_info->freq_lo_delay_us; } od_update(policy); @@ -213,10 +211,10 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; - return dbs_info->freq_hi_jiffies; + return dbs_info->freq_hi_delay_us; } - return delay_for_sampling_rate(dbs_data->sampling_rate * policy_dbs->rate_mult); + return dbs_data->sampling_rate * policy_dbs->rate_mult; } /************************** sysfs interface ************************/ From a7f35cffb980f3aec75f74559a4320974c845b78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Feb 2016 21:02:24 +0100 Subject: [PATCH 39/61] cpufreq: ondemand: Simplify od_update() slightly Drop some lines of code from od_update() by arranging the statements in there in a more logical way. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_ondemand.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a3ee74577404f..34e3a1be9971c 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -175,14 +175,11 @@ static void od_update(struct cpufreq_policy *policy) /* No longer fully busy, reset rate_mult */ policy_dbs->rate_mult = 1; - if (!od_tuners->powersave_bias) { - __cpufreq_driver_target(policy, freq_next, - CPUFREQ_RELATION_C); - return; - } - - freq_next = od_ops.powersave_bias_target(policy, freq_next, - CPUFREQ_RELATION_L); + if (od_tuners->powersave_bias) + freq_next = od_ops.powersave_bias_target(policy, + freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); } } From 8eb055d3f53e52805907ea54e2eec0885be91a50 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Feb 2016 21:02:32 +0100 Subject: [PATCH 40/61] cpufreq: ondemand: Drop unused callback from struct od_ops The ->freq_increase callback in struct od_ops is never invoked, so drop it. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.h | 1 - drivers/cpufreq/cpufreq_ondemand.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 7ae0c71143fa2..675e1cdbb46c0 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -220,7 +220,6 @@ struct od_ops { void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation); - void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); }; extern struct mutex dbs_data_mutex; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 34e3a1be9971c..375fdcfbc02ed 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -417,7 +417,6 @@ define_get_cpu_dbs_routines(od_cpu_dbs_info); static struct od_ops od_ops = { .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, .powersave_bias_target = generic_powersave_bias_target, - .freq_increase = dbs_freq_increase, }; static struct dbs_governor od_dbs_gov = { From 574ef14d5dbcd2743326cc1b28e61a1e7733162a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:19:00 +0100 Subject: [PATCH 41/61] cpufreq: governor: Close dbs_data update race condition It is possible for a dbs_data object to be updated after its usage counter has become 0. That may happen if governor_store() runs (via a govenor tunable sysfs attribute write) in parallel with cpufreq_governor_exit() called for the last cpufreq policy associated with the dbs_data in question. In that case, if governor_store() acquires dbs_data->mutex right after cpufreq_governor_exit() has released it, the ->store() callback invoked by it may operate on dbs_data with no users. Although sysfs will cause the kobject_put() in cpufreq_governor_exit() to block until governor_store() has returned, that situation may lead to some unexpected results, depending on the implementation of the ->store callback, and therefore it should be avoided. To that end, modify governor_store() to check the dbs_data's usage count before invoking the ->store() callback and return an error if it is 0 at that point. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 56dba71d1788f..65ed859030bad 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -112,7 +112,7 @@ static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, mutex_lock(&dbs_data->mutex); - if (gattr->store) + if (dbs_data->usage_count && gattr->store) ret = gattr->store(dbs_data, buf, count); mutex_unlock(&dbs_data->mutex); From 8847e038c1d19c20dda0d7a590e31ffa528da8a5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:20:13 +0100 Subject: [PATCH 42/61] cpufreq: governor: Move io_is_busy to struct dbs_data The io_is_busy governor tunable is only used by the ondemand governor and is located in the ondemand-specific data structure, but it is looked at by the common governor code that has to do ugly things to get to that value, so move it to struct dbs_data and modify ondemand accordingly. Since the conservative governor never touches that field, it will be always 0 for that governor and it won't have any effect on the results of computations in that case. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 27 +++++++++------------------ drivers/cpufreq/cpufreq_governor.h | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 12 +++++------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 65ed859030bad..60268160e0ada 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -137,10 +137,9 @@ unsigned int dbs_update(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int ignore_nice = dbs_data->ignore_nice_load; unsigned int max_load = 0; - unsigned int sampling_rate, j; + unsigned int sampling_rate, io_busy, j; /* * Sometimes governors may use an additional multiplier to increase @@ -149,6 +148,12 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * conservative. */ sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult; + /* + * For the purpose of ondemand, waiting for disk IO is an indication + * that you're performance critical, and not that the system is actually + * idle, so do not add the iowait time to the CPU idle time then. + */ + io_busy = dbs_data->io_is_busy; /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { @@ -156,18 +161,9 @@ unsigned int dbs_update(struct cpufreq_policy *policy) u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; - int io_busy = 0; j_cdbs = gov->get_cpu_cdbs(j); - /* - * For the purpose of ondemand, waiting for disk IO is - * an indication that you're performance critical, and - * not that the system is actually idle. So do not add - * the iowait time to the cpu idle time. - */ - if (gov->governor == GOV_ONDEMAND) - io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; @@ -522,7 +518,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - int io_busy = 0; + unsigned int io_busy; if (!policy->cur) return -EINVAL; @@ -532,12 +528,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; - - if (gov->governor == GOV_ONDEMAND) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - - io_busy = od_tuners->io_is_busy; - } + io_busy = dbs_data->io_is_busy; for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 675e1cdbb46c0..7b36393280661 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -71,6 +71,7 @@ struct dbs_data { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; + unsigned int io_is_busy; struct kobject kobj; struct list_head policy_dbs_list; @@ -177,7 +178,6 @@ struct cs_cpu_dbs_info_s { /* Per policy Governors sysfs tunables */ struct od_dbs_tuners { unsigned int powersave_bias; - unsigned int io_is_busy; }; struct cs_dbs_tuners { diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 375fdcfbc02ed..330b5884b99b4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -220,7 +220,6 @@ static struct dbs_governor od_dbs_gov; static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; unsigned int j; @@ -228,14 +227,14 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - od_tuners->io_is_busy = !!input; + dbs_data->io_is_busy = !!input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); } return count; } @@ -286,7 +285,6 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; @@ -309,7 +307,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, struct od_cpu_dbs_info_s *dbs_info; dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -342,7 +340,7 @@ gov_show_one_common(up_threshold); gov_show_one_common(sampling_down_factor); gov_show_one_common(ignore_nice_load); gov_show_one_common(min_sampling_rate); -gov_show_one(od, io_is_busy); +gov_show_one_common(io_is_busy); gov_show_one(od, powersave_bias); gov_attr_rw(sampling_rate); @@ -401,7 +399,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; dbs_data->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; - tuners->io_is_busy = should_io_be_busy(); + dbs_data->io_is_busy = should_io_be_busy(); dbs_data->tuners = tuners; return 0; From 702c9e542a25cf95683c08c56e711eddb80020ac Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:21:21 +0100 Subject: [PATCH 43/61] cpufreq: governor: Add a ->start callback for governors To avoid having to check the governor type explicitly in the common code in order to initialize data structures specific to the governor type properly, add a ->start callback to struct dbs_governor and use it to initialize those data structures for the ondemand and conservative governors. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 9 +++++++++ drivers/cpufreq/cpufreq_governor.c | 16 ++-------------- drivers/cpufreq/cpufreq_governor.h | 1 + drivers/cpufreq/cpufreq_ondemand.c | 10 ++++++++++ 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4a6f8e1ed72e2..c11fe95152dea 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -279,6 +279,14 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) kfree(dbs_data->tuners); } +static void cs_start(struct cpufreq_policy *policy) +{ + struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); + + dbs_info->down_skip = 0; + dbs_info->requested_freq = policy->cur; +} + define_get_cpu_dbs_routines(cs_cpu_dbs_info); static struct dbs_governor cs_dbs_gov = { @@ -295,6 +303,7 @@ static struct dbs_governor cs_dbs_gov = { .gov_dbs_timer = cs_dbs_timer, .init = cs_init, .exit = cs_exit, + .start = cs_start, }; #define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 60268160e0ada..badbd467e5e26 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -517,7 +517,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; + unsigned int sampling_rate, ignore_nice, j; unsigned int io_busy; if (!policy->cur) @@ -543,19 +543,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } - if (gov->governor == GOV_CONSERVATIVE) { - struct cs_cpu_dbs_info_s *cs_dbs_info = - gov->get_cpu_dbs_info_s(cpu); - - cs_dbs_info->down_skip = 0; - cs_dbs_info->requested_freq = policy->cur; - } else { - struct od_ops *od_ops = gov->gov_ops; - struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu); - - od_dbs_info->sample_type = OD_NORMAL_SAMPLE; - od_ops->powersave_bias_init_cpu(cpu); - } + gov->start(policy); gov_set_update_util(policy_dbs, sampling_rate); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 7b36393280661..2ae0ad50ca3d3 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -205,6 +205,7 @@ struct dbs_governor { unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); + void (*start)(struct cpufreq_policy *policy); /* Governor specific ops, see below */ void *gov_ops; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 330b5884b99b4..de069f80b6197 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -410,6 +410,15 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) kfree(dbs_data->tuners); } +static void od_start(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + dbs_info->sample_type = OD_NORMAL_SAMPLE; + od_ops.powersave_bias_init_cpu(cpu); +} + define_get_cpu_dbs_routines(od_cpu_dbs_info); static struct od_ops od_ops = { @@ -432,6 +441,7 @@ static struct dbs_governor od_dbs_gov = { .gov_ops = &od_ops, .init = od_init, .exit = od_exit, + .start = od_start, }; #define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) From 8434dadbb457813a127f56d9f0fb7d22035027b9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:22:42 +0100 Subject: [PATCH 44/61] cpufreq: governor: Drop unused governor callback and data fields After some previous changes, the ->get_cpu_dbs_info_s governor callback and the "governor" field in struct dbs_governor (whose value represents the governor type) are not used any more, so drop them. Also drop the unused gov_ops field from struct dbs_governor. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 -- drivers/cpufreq/cpufreq_governor.h | 15 +-------------- drivers/cpufreq/cpufreq_ondemand.c | 3 --- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c11fe95152dea..cdc7531398615 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -296,10 +296,8 @@ static struct dbs_governor cs_dbs_gov = { .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, - .governor = GOV_CONSERVATIVE, .kobj_type = { .default_attrs = cs_attributes }, .get_cpu_cdbs = get_cpu_cdbs, - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, .init = cs_init, .exit = cs_exit, diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 2ae0ad50ca3d3..ee46f34f04d70 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -46,11 +46,6 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ { \ return &per_cpu(_dbs_info, cpu).cdbs; \ -} \ - \ -static void *get_cpu_dbs_info_s(int cpu) \ -{ \ - return &per_cpu(_dbs_info, cpu); \ } /* @@ -188,10 +183,6 @@ struct cs_dbs_tuners { /* Common Governor data across policies */ struct dbs_governor { struct cpufreq_governor gov; - - #define GOV_ONDEMAND 0 - #define GOV_CONSERVATIVE 1 - int governor; struct kobj_type kobj_type; /* @@ -201,14 +192,10 @@ struct dbs_governor { struct dbs_data *gdbs_data; struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); - void *(*get_cpu_dbs_info_s)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); void (*start)(struct cpufreq_policy *policy); - - /* Governor specific ops, see below */ - void *gov_ops; }; static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) @@ -216,7 +203,7 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy return container_of(policy->governor, struct dbs_governor, gov); } -/* Governor specific ops, will be passed to dbs_data->gov_ops */ +/* Governor specific operations */ struct od_ops { void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index de069f80b6197..41d239c8dbf69 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -433,12 +433,9 @@ static struct dbs_governor od_dbs_gov = { .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, - .governor = GOV_ONDEMAND, .kobj_type = { .default_attrs = od_attributes }, .get_cpu_cdbs = get_cpu_cdbs, - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer, - .gov_ops = &od_ops, .init = od_init, .exit = od_exit, .start = od_start, From 76c5f66aa10720a377dfe8beebd39a0b2a938965 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:24:32 +0100 Subject: [PATCH 45/61] cpufreq: ondemand: Drop one more callback from struct od_ops The ->powersave_bias_init_cpu callback in struct od_ops is only used in one place and that invocation may be replaced with a direct call to the function pointed to by that callback, so change the code accordingly and drop the callback. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.h | 1 - drivers/cpufreq/cpufreq_ondemand.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ee46f34f04d70..ec98065dc30dd 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -205,7 +205,6 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy /* Governor specific operations */ struct od_ops { - void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation); }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 41d239c8dbf69..393fcf13a2b6b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -416,13 +416,12 @@ static void od_start(struct cpufreq_policy *policy) struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); dbs_info->sample_type = OD_NORMAL_SAMPLE; - od_ops.powersave_bias_init_cpu(cpu); + ondemand_powersave_bias_init_cpu(cpu); } define_get_cpu_dbs_routines(od_cpu_dbs_info); static struct od_ops od_ops = { - .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, .powersave_bias_target = generic_powersave_bias_target, }; From a33cce1c6cc3268d8b4843bf1e4ac1e70b27d107 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:26:55 +0100 Subject: [PATCH 46/61] cpufreq: governor: Fix CPU load information updates via ->store The ->store() callbacks of some tunable sysfs attributes of the ondemand and conservative governors trigger immediate updates of the CPU load information for all CPUs "governed" by the given dbs_data by walking the cpu_dbs_info structures for all online CPUs in the system and updating them. This is questionable for two reasons. First, it may lead to a lot of extra overhead on a system with many CPUs if the given dbs_data is only associated with a few of them. Second, if governor tunables are per-policy, the CPUs associated with the other sets of governor tunables should not be updated. To address this issue, use the observation that in all of the places in question the update operation may be carried out in the same way (because all of the tunables involved are now located in struct dbs_data and readily available to the common code) and make the code in those places invoke the same (new) helper function that will carry out the update correctly. That new function always checks the ignore_nice_load tunable value and updates the CPUs' prev_cpu_nice data fields if that's set, which wasn't done by the original code in store_io_is_busy(), but it should have been done in there too. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 15 +++++-------- drivers/cpufreq/cpufreq_governor.c | 30 ++++++++++++++++++++++++++ drivers/cpufreq/cpufreq_governor.h | 1 + drivers/cpufreq/cpufreq_ondemand.c | 22 ++++--------------- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index cdc7531398615..876984c842b1e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -23,6 +23,8 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); +static struct dbs_governor cs_dbs_gov; + static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -164,7 +166,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - unsigned int input, j; + unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -180,15 +182,8 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct cs_cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(cs_cpu_dbs_info, j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, 0); - if (dbs_data->ignore_nice_load) - dbs_info->cdbs.prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - } + gov_update_cpu_data(&cs_dbs_gov, dbs_data); + return count; } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index badbd467e5e26..4b14f04daa41f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -80,6 +80,36 @@ ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, } EXPORT_SYMBOL_GPL(store_sampling_rate); +/** + * gov_update_cpu_data - Update CPU load data. + * @gov: Governor whose data is to be updated. + * @dbs_data: Top-level governor data pointer. + * + * Update CPU load data for all CPUs in the domain governed by @dbs_data + * (that may be a single policy or a bunch of them if governor tunables are + * system-wide). + * + * Call under the @dbs_data mutex. + */ +void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data) +{ + struct policy_dbs_info *policy_dbs; + + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + unsigned int j; + + for_each_cpu(j, policy_dbs->policy->cpus) { + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, + dbs_data->io_is_busy); + if (dbs_data->ignore_nice_load) + j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + } +} +EXPORT_SYMBOL_GPL(gov_update_cpu_data); + static inline struct dbs_data *to_dbs_data(struct kobject *kobj) { return container_of(kobj, struct dbs_data, kobj); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ec98065dc30dd..5c7d1ea96fff0 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -218,4 +218,5 @@ void od_register_powersave_bias_handler(unsigned int (*f) void od_unregister_powersave_bias_handler(void); ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, size_t count); +void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 393fcf13a2b6b..216ea442b835f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -29,6 +29,7 @@ static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); +static struct dbs_governor od_dbs_gov; static struct od_ops od_ops; static unsigned int default_powersave_bias; @@ -222,7 +223,6 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, { unsigned int input; int ret; - unsigned int j; ret = sscanf(buf, "%u", &input); if (ret != 1) @@ -230,12 +230,8 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, dbs_data->io_is_busy = !!input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); - } + gov_update_cpu_data(&od_dbs_gov, dbs_data); + return count; } @@ -288,8 +284,6 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, unsigned int input; int ret; - unsigned int j; - ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; @@ -303,16 +297,8 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(od_cpu_dbs_info, j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); - if (dbs_data->ignore_nice_load) - dbs_info->cdbs.prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + gov_update_cpu_data(&od_dbs_gov, dbs_data); - } return count; } From d1db75fffc22504c586c3fae8d602384ea899340 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:28:24 +0100 Subject: [PATCH 47/61] cpufreq: ondemand: Rework the handling of powersave bias updates The ondemand_powersave_bias_init() function used for resetting data fields related to the powersave bias tunable of the ondemand governor works by walking all of the online CPUs in the system and updating the od_cpu_dbs_info_s structures for all of them. However, if governor tunables are per policy, the update should not touch the CPUs that are not associated with the given dbs_data. Moreover, since the data fields in question are only ever used for policy->cpu in each policy governed by ondemand, the update can be limited to those specific CPUs. Rework the code to take the above observations into account. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_ondemand.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 216ea442b835f..43d89f6af206c 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -34,14 +34,6 @@ static struct od_ops od_ops; static unsigned int default_powersave_bias; -static void ondemand_powersave_bias_init_cpu(int cpu) -{ - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - - dbs_info->freq_table = cpufreq_frequency_get_table(cpu); - dbs_info->freq_lo = 0; -} - /* * Not all CPUs want IO time to be accounted as busy; this depends on how * efficient idling at a higher frequency/voltage is. @@ -120,12 +112,13 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_hi; } -static void ondemand_powersave_bias_init(void) +static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { - int i; - for_each_online_cpu(i) { - ondemand_powersave_bias_init_cpu(i); - } + unsigned int cpu = policy->cpu; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; } static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) @@ -306,6 +299,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, size_t count) { struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct policy_dbs_info *policy_dbs; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -317,7 +311,10 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, input = 1000; od_tuners->powersave_bias = input; - ondemand_powersave_bias_init(); + + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) + ondemand_powersave_bias_init(policy_dbs->policy); + return count; } @@ -398,11 +395,10 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) static void od_start(struct cpufreq_policy *policy) { - unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); dbs_info->sample_type = OD_NORMAL_SAMPLE; - ondemand_powersave_bias_init_cpu(cpu); + ondemand_powersave_bias_init(policy); } define_get_cpu_dbs_routines(od_cpu_dbs_info); From 7d5a9956af4ccf7d5cc0cd1f8d27d1691321bfc6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 18:40:14 +0100 Subject: [PATCH 48/61] cpufreq: governor: Make governor private data per-policy Some fields in struct od_cpu_dbs_info_s and struct cs_cpu_dbs_info_s are only used for a limited set of CPUs. Namely, if a policy is shared between multiple CPUs, those fields will only be used for one of them (policy->cpu). This means that they really are per-policy rather than per-CPU and holding room for them in per-CPU data structures is generally wasteful. Also moving those fields into per-policy data structures will allow some significant simplifications to be made going forward. For this reason, introduce struct cs_policy_dbs_info and struct od_policy_dbs_info to hold those fields. Define each of the new structures as an extension of struct policy_dbs_info (such that struct policy_dbs_info is embedded in each of them) and introduce new ->alloc and ->free governor callbacks to allocate and free those structures, respectively, such that ->alloc() will return a pointer to the struct policy_dbs_info embedded in the allocated data structure and ->free() will take that pointer as its argument. With that, modify the code accessing the data fields in question in per-CPU data objects to look for them in the new structures via the struct policy_dbs_info pointer available to it and drop them from struct od_cpu_dbs_info_s and struct cs_cpu_dbs_info_s. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 5 ++-- drivers/cpufreq/cpufreq_conservative.c | 34 +++++++++++++++++++++++--- drivers/cpufreq/cpufreq_governor.c | 7 +++--- drivers/cpufreq/cpufreq_governor.h | 9 ++----- drivers/cpufreq/cpufreq_ondemand.c | 34 ++++++++++++++++++-------- drivers/cpufreq/cpufreq_ondemand.h | 26 ++++++++++++++++++++ 6 files changed, 87 insertions(+), 28 deletions(-) create mode 100644 drivers/cpufreq/cpufreq_ondemand.h diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 82ae1002def18..404360cad25c4 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -21,7 +21,7 @@ #include #include -#include "cpufreq_governor.h" +#include "cpufreq_ondemand.h" #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 @@ -48,8 +48,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *od_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = od_data->tuners; - struct od_cpu_dbs_info_s *od_info = - dbs_governor_of(policy)->get_cpu_dbs_info_s(policy->cpu); + struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs); if (!od_info->freq_table) return freq_next; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 876984c842b1e..ffffda2dcbfcd 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -14,6 +14,17 @@ #include #include "cpufreq_governor.h" +struct cs_policy_dbs_info { + struct policy_dbs_info policy_dbs; + unsigned int down_skip; + unsigned int requested_freq; +}; + +static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) +{ + return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs); +} + /* Conservative governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) @@ -48,8 +59,8 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, */ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int load = dbs_update(policy); @@ -238,6 +249,19 @@ static struct attribute *cs_attributes[] = { /************************** sysfs end ************************/ +static struct policy_dbs_info *cs_alloc(void) +{ + struct cs_policy_dbs_info *dbs_info; + + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); + return dbs_info ? &dbs_info->policy_dbs : NULL; +} + +static void cs_free(struct policy_dbs_info *policy_dbs) +{ + kfree(to_dbs_info(policy_dbs)); +} + static int cs_init(struct dbs_data *dbs_data, bool notify) { struct cs_dbs_tuners *tuners; @@ -276,7 +300,7 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) static void cs_start(struct cpufreq_policy *policy) { - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); dbs_info->down_skip = 0; dbs_info->requested_freq = policy->cur; @@ -294,6 +318,8 @@ static struct dbs_governor cs_dbs_gov = { .kobj_type = { .default_attrs = cs_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = cs_dbs_timer, + .alloc = cs_alloc, + .free = cs_free, .init = cs_init, .exit = cs_exit, .start = cs_start, @@ -305,9 +331,8 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - struct cs_cpu_dbs_info_s *dbs_info = - &per_cpu(cs_cpu_dbs_info, freq->cpu); struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); + struct cs_policy_dbs_info *dbs_info; if (!policy) return 0; @@ -316,6 +341,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) return 0; + dbs_info = to_dbs_info(policy->governor_data); /* * we only care if our internally tracked freq moves outside the 'valid' * ranges of frequency available to us otherwise we do not change it diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 4b14f04daa41f..6cbc846e39816 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -385,8 +385,8 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli struct policy_dbs_info *policy_dbs; int j; - /* Allocate memory for the common information for policy->cpus */ - policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL); + /* Allocate memory for per-policy governor data. */ + policy_dbs = gov->alloc(); if (!policy_dbs) return NULL; @@ -421,7 +421,7 @@ static void free_policy_dbs_info(struct cpufreq_policy *policy, j_cdbs->policy_dbs = NULL; j_cdbs->update_util.func = NULL; } - kfree(policy_dbs); + gov->free(policy_dbs); } static int cpufreq_governor_init(struct cpufreq_policy *policy) @@ -582,7 +582,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { gov_cancel_work(policy); - return 0; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 5c7d1ea96fff0..354e0d306ff53 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -157,17 +157,10 @@ struct cpu_dbs_info { struct od_cpu_dbs_info_s { struct cpu_dbs_info cdbs; - struct cpufreq_frequency_table *freq_table; - unsigned int freq_lo; - unsigned int freq_lo_delay_us; - unsigned int freq_hi_delay_us; - unsigned int sample_type:1; }; struct cs_cpu_dbs_info_s { struct cpu_dbs_info cdbs; - unsigned int down_skip; - unsigned int requested_freq; }; /* Per policy Governors sysfs tunables */ @@ -193,6 +186,8 @@ struct dbs_governor { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); + struct policy_dbs_info *(*alloc)(void); + void (*free)(struct policy_dbs_info *policy_dbs); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); void (*start)(struct cpufreq_policy *policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 43d89f6af206c..cdf431696c40b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -16,7 +16,8 @@ #include #include #include -#include "cpufreq_governor.h" + +#include "cpufreq_ondemand.h" /* On-demand governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) @@ -69,9 +70,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_hi, freq_lo; unsigned int index = 0; unsigned int delay_hi_us; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -114,10 +114,9 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { - unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); - dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu); dbs_info->freq_lo = 0; } @@ -144,8 +143,8 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) */ static void od_update(struct cpufreq_policy *policy) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - struct policy_dbs_info *policy_dbs = dbs_info->cdbs.policy_dbs; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int load = dbs_update(policy); @@ -182,7 +181,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); int sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ @@ -347,6 +346,19 @@ static struct attribute *od_attributes[] = { /************************** sysfs end ************************/ +static struct policy_dbs_info *od_alloc(void) +{ + struct od_policy_dbs_info *dbs_info; + + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); + return dbs_info ? &dbs_info->policy_dbs : NULL; +} + +static void od_free(struct policy_dbs_info *policy_dbs) +{ + kfree(to_dbs_info(policy_dbs)); +} + static int od_init(struct dbs_data *dbs_data, bool notify) { struct od_dbs_tuners *tuners; @@ -395,7 +407,7 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) static void od_start(struct cpufreq_policy *policy) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); dbs_info->sample_type = OD_NORMAL_SAMPLE; ondemand_powersave_bias_init(policy); @@ -417,6 +429,8 @@ static struct dbs_governor od_dbs_gov = { .kobj_type = { .default_attrs = od_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = od_dbs_timer, + .alloc = od_alloc, + .free = od_free, .init = od_init, .exit = od_exit, .start = od_start, diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h new file mode 100644 index 0000000000000..22403e4e0cb07 --- /dev/null +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -0,0 +1,26 @@ +/* + * Header file for CPUFreq ondemand governor and related code. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "cpufreq_governor.h" + +struct od_policy_dbs_info { + struct policy_dbs_info policy_dbs; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_delay_us; + unsigned int freq_hi_delay_us; + unsigned int sample_type:1; +}; + +static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) +{ + return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs); +} From 8c8f77fd0719a079450f59debed4f69ede825adb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 21 Feb 2016 00:51:27 +0100 Subject: [PATCH 49/61] cpufreq: governor: Move per-CPU data to the common code After previous changes there is only one piece of code in the ondemand governor making references to per-CPU data structures, but it can be easily modified to avoid doing that, so modify it accordingly and move the definition of per-CPU data used by the ondemand and conservative governors to the common code. Next, change that code to access the per-CPU data structures directly rather than via a governor callback. This causes the ->get_cpu_cdbs governor callback to become unnecessary, so drop it along with the macro and function definitions related to it. Finally, drop the definitions of struct od_cpu_dbs_info_s and struct cs_cpu_dbs_info_s that aren't necessary any more. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 9 +------- drivers/cpufreq/cpufreq_governor.c | 31 +++++++++++--------------- drivers/cpufreq/cpufreq_governor.h | 18 +-------------- drivers/cpufreq/cpufreq_ondemand.c | 26 +++++++++------------ 4 files changed, 25 insertions(+), 59 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index ffffda2dcbfcd..5d1edc55aa636 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -32,10 +32,6 @@ static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *pol #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) -static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); - -static struct dbs_governor cs_dbs_gov; - static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -193,7 +189,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - gov_update_cpu_data(&cs_dbs_gov, dbs_data); + gov_update_cpu_data(dbs_data); return count; } @@ -306,8 +302,6 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } -define_get_cpu_dbs_routines(cs_cpu_dbs_info); - static struct dbs_governor cs_dbs_gov = { .gov = { .name = "conservative", @@ -316,7 +310,6 @@ static struct dbs_governor cs_dbs_gov = { .owner = THIS_MODULE, }, .kobj_type = { .default_attrs = cs_attributes }, - .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = cs_dbs_timer, .alloc = cs_alloc, .free = cs_free, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6cbc846e39816..75217b850d7bd 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,6 +22,8 @@ #include "cpufreq_governor.h" +static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); + DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); @@ -82,7 +84,6 @@ EXPORT_SYMBOL_GPL(store_sampling_rate); /** * gov_update_cpu_data - Update CPU load data. - * @gov: Governor whose data is to be updated. * @dbs_data: Top-level governor data pointer. * * Update CPU load data for all CPUs in the domain governed by @dbs_data @@ -91,7 +92,7 @@ EXPORT_SYMBOL_GPL(store_sampling_rate); * * Call under the @dbs_data mutex. */ -void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data) +void gov_update_cpu_data(struct dbs_data *dbs_data) { struct policy_dbs_info *policy_dbs; @@ -99,7 +100,7 @@ void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data) unsigned int j; for_each_cpu(j, policy_dbs->policy->cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, dbs_data->io_is_busy); @@ -164,7 +165,6 @@ static const struct sysfs_ops governor_sysfs_ops = { unsigned int dbs_update(struct cpufreq_policy *policy) { - struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; unsigned int ignore_nice = dbs_data->ignore_nice_load; @@ -187,13 +187,11 @@ unsigned int dbs_update(struct cpufreq_policy *policy) /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs; + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; - j_cdbs = gov->get_cpu_cdbs(j); - cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; @@ -268,14 +266,13 @@ void gov_set_update_util(struct policy_dbs_info *policy_dbs, unsigned int delay_us) { struct cpufreq_policy *policy = policy_dbs->policy; - struct dbs_governor *gov = dbs_governor_of(policy); int cpu; gov_update_sample_delay(policy_dbs, delay_us); policy_dbs->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); cpufreq_set_update_util_data(cpu, &cdbs->update_util); } @@ -398,7 +395,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli /* Set policy_dbs for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->policy_dbs = policy_dbs; j_cdbs->update_util.func = dbs_update_util_handler; @@ -406,17 +403,15 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli return policy_dbs; } -static void free_policy_dbs_info(struct cpufreq_policy *policy, +static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, struct dbs_governor *gov) { - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int j; mutex_destroy(&policy_dbs->timer_mutex); - for_each_cpu(j, policy->related_cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + for_each_cpu(j, policy_dbs->policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->policy_dbs = NULL; j_cdbs->update_util.func = NULL; @@ -507,7 +502,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) kfree(dbs_data); free_policy_dbs_info: - free_policy_dbs_info(policy, gov); + free_policy_dbs_info(policy_dbs, gov); return ret; } @@ -538,7 +533,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; } - free_policy_dbs_info(policy, gov); + free_policy_dbs_info(policy_dbs, gov); return 0; } @@ -561,7 +556,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) io_busy = dbs_data->io_is_busy; for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); unsigned int prev_load; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 354e0d306ff53..58749da97099a 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -41,13 +41,6 @@ /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; -/* create helper routines */ -#define define_get_cpu_dbs_routines(_dbs_info) \ -static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ -{ \ - return &per_cpu(_dbs_info, cpu).cdbs; \ -} - /* * Abbreviations: * dbs: used as a shortform for demand based switching It helps to keep variable @@ -155,14 +148,6 @@ struct cpu_dbs_info { struct policy_dbs_info *policy_dbs; }; -struct od_cpu_dbs_info_s { - struct cpu_dbs_info cdbs; -}; - -struct cs_cpu_dbs_info_s { - struct cpu_dbs_info cdbs; -}; - /* Per policy Governors sysfs tunables */ struct od_dbs_tuners { unsigned int powersave_bias; @@ -184,7 +169,6 @@ struct dbs_governor { */ struct dbs_data *gdbs_data; - struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); struct policy_dbs_info *(*alloc)(void); void (*free)(struct policy_dbs_info *policy_dbs); @@ -213,5 +197,5 @@ void od_register_powersave_bias_handler(unsigned int (*f) void od_unregister_powersave_bias_handler(void); ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, size_t count); -void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data); +void gov_update_cpu_data(struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index cdf431696c40b..acd80272ded67 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,9 +28,6 @@ #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); - -static struct dbs_governor od_dbs_gov; static struct od_ops od_ops; static unsigned int default_powersave_bias; @@ -222,7 +219,7 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, dbs_data->io_is_busy = !!input; /* we need to re-evaluate prev_cpu_idle */ - gov_update_cpu_data(&od_dbs_gov, dbs_data); + gov_update_cpu_data(dbs_data); return count; } @@ -289,7 +286,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - gov_update_cpu_data(&od_dbs_gov, dbs_data); + gov_update_cpu_data(dbs_data); return count; } @@ -413,8 +410,6 @@ static void od_start(struct cpufreq_policy *policy) ondemand_powersave_bias_init(policy); } -define_get_cpu_dbs_routines(od_cpu_dbs_info); - static struct od_ops od_ops = { .powersave_bias_target = generic_powersave_bias_target, }; @@ -427,7 +422,6 @@ static struct dbs_governor od_dbs_gov = { .owner = THIS_MODULE, }, .kobj_type = { .default_attrs = od_attributes }, - .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = od_dbs_timer, .alloc = od_alloc, .free = od_free, @@ -440,9 +434,6 @@ static struct dbs_governor od_dbs_gov = { static void od_set_powersave_bias(unsigned int powersave_bias) { - struct cpufreq_policy *policy; - struct dbs_data *dbs_data; - struct od_dbs_tuners *od_tuners; unsigned int cpu; cpumask_t done; @@ -451,21 +442,24 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { + struct cpufreq_policy *policy; struct policy_dbs_info *policy_dbs; + struct dbs_data *dbs_data; + struct od_dbs_tuners *od_tuners; if (cpumask_test_cpu(cpu, &done)) continue; - policy_dbs = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy_dbs; + policy = cpufreq_cpu_get_raw(cpu); + if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) + continue; + + policy_dbs = policy->governor_data; if (!policy_dbs) continue; - policy = policy_dbs->policy; cpumask_or(&done, &done, policy->cpus); - if (policy->governor != CPU_FREQ_GOV_ONDEMAND) - continue; - dbs_data = policy_dbs->dbs_data; od_tuners = dbs_data->tuners; od_tuners->powersave_bias = default_powersave_bias; From 47ebaac1f32dc606262be48a72f9cea6af376414 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 18:41:36 +0100 Subject: [PATCH 50/61] cpufreq: governor: Relocate definitions of tuners structures Move the definitions of struct od_dbs_tuners and struct cs_dbs_tuners from the common governor header to the ondemand and conservative governor code, respectively, as they don't need to be in the common header any more. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 5 +++++ drivers/cpufreq/cpufreq_governor.h | 10 ---------- drivers/cpufreq/cpufreq_ondemand.h | 4 ++++ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 5d1edc55aa636..bf4913f6453b1 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -25,6 +25,11 @@ static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *pol return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs); } +struct cs_dbs_tuners { + unsigned int down_threshold; + unsigned int freq_step; +}; + /* Conservative governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 58749da97099a..ece70ab6bbfc0 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -148,16 +148,6 @@ struct cpu_dbs_info { struct policy_dbs_info *policy_dbs; }; -/* Per policy Governors sysfs tunables */ -struct od_dbs_tuners { - unsigned int powersave_bias; -}; - -struct cs_dbs_tuners { - unsigned int down_threshold; - unsigned int freq_step; -}; - /* Common Governor data across policies */ struct dbs_governor { struct cpufreq_governor gov; diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h index 22403e4e0cb07..f0121db3cd9ed 100644 --- a/drivers/cpufreq/cpufreq_ondemand.h +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -24,3 +24,7 @@ static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *pol { return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs); } + +struct od_dbs_tuners { + unsigned int powersave_bias; +}; From e3f5ed9393042188a1716d3873415ef44161addf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:33:43 +0100 Subject: [PATCH 51/61] cpufreq: governor: Make dbs_data_mutex static That mutex is only used by cpufreq_governor_dbs() and it doesn't need to be exported to modules, so make it static and drop the export incantation. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 3 +-- drivers/cpufreq/cpufreq_governor.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 75217b850d7bd..4f0bd482b59ee 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -24,8 +24,7 @@ static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); -DEFINE_MUTEX(dbs_data_mutex); -EXPORT_SYMBOL_GPL(dbs_data_mutex); +static DEFINE_MUTEX(dbs_data_mutex); /* Common sysfs tunables */ /** diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ece70ab6bbfc0..61ff82fe06132 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -178,7 +178,6 @@ struct od_ops { unsigned int freq_next, unsigned int relation); }; -extern struct mutex dbs_data_mutex; unsigned int dbs_update(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) From 1112e9d83e5cd153b35dfbb52721f8b3d3163016 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 21 Feb 2016 00:53:06 +0100 Subject: [PATCH 52/61] cpufreq: governor: Narrow down the dbs_data_mutex coverage Since cpufreq_governor_dbs() is now always called with policy->rwsem held, it cannot be executed twice in parallel for the same policy. Thus it is not necessary to hold dbs_data_mutex around the invocations of cpufreq_governor_start/stop/limits() from it as those functions never modify any data that can be shared between different policies. However, cpufreq_governor_dbs() may be executed twice in parallal for different policies using the same gov->gdbs_data object and dbs_data_mutex is still necessary to protect that object against concurrent updates. For this reason, narrow down the dbs_data_mutex locking to cpufreq_governor_init/exit() where it is needed and rename the mutex to gov_dbs_data_mutex to reflect its purpose. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 4f0bd482b59ee..542c9caf88153 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -24,7 +24,7 @@ static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); -static DEFINE_MUTEX(dbs_data_mutex); +static DEFINE_MUTEX(gov_dbs_data_mutex); /* Common sysfs tunables */ /** @@ -421,10 +421,10 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, static int cpufreq_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - struct dbs_data *dbs_data = gov->gdbs_data; + struct dbs_data *dbs_data; struct policy_dbs_info *policy_dbs; unsigned int latency; - int ret; + int ret = 0; /* State should be equivalent to EXIT */ if (policy->governor_data) @@ -434,6 +434,10 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!policy_dbs) return -ENOMEM; + /* Protect gov->gdbs_data against concurrent updates. */ + mutex_lock(&gov_dbs_data_mutex); + + dbs_data = gov->gdbs_data; if (dbs_data) { if (WARN_ON(have_governor_per_policy())) { ret = -EINVAL; @@ -446,8 +450,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) dbs_data->usage_count++; list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); mutex_unlock(&dbs_data->mutex); - - return 0; + goto out; } dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); @@ -488,7 +491,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) get_governor_parent_kobj(policy), "%s", gov->gov.name); if (!ret) - return 0; + goto out; /* Failure, so roll back. */ pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); @@ -502,6 +505,9 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) free_policy_dbs_info: free_policy_dbs_info(policy_dbs, gov); + +out: + mutex_unlock(&gov_dbs_data_mutex); return ret; } @@ -512,6 +518,9 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy_dbs->dbs_data; int count; + /* Protect gov->gdbs_data against concurrent updates. */ + mutex_lock(&gov_dbs_data_mutex); + mutex_lock(&dbs_data->mutex); list_del(&policy_dbs->list); count = --dbs_data->usage_count; @@ -533,6 +542,8 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) } free_policy_dbs_info(policy_dbs, gov); + + mutex_unlock(&gov_dbs_data_mutex); return 0; } @@ -599,31 +610,20 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - int ret = -EINVAL; - - /* Lock governor to block concurrent initialization of governor */ - mutex_lock(&dbs_data_mutex); - if (event == CPUFREQ_GOV_POLICY_INIT) { - ret = cpufreq_governor_init(policy); + return cpufreq_governor_init(policy); } else if (policy->governor_data) { switch (event) { case CPUFREQ_GOV_POLICY_EXIT: - ret = cpufreq_governor_exit(policy); - break; + return cpufreq_governor_exit(policy); case CPUFREQ_GOV_START: - ret = cpufreq_governor_start(policy); - break; + return cpufreq_governor_start(policy); case CPUFREQ_GOV_STOP: - ret = cpufreq_governor_stop(policy); - break; + return cpufreq_governor_stop(policy); case CPUFREQ_GOV_LIMITS: - ret = cpufreq_governor_limits(policy); - break; + return cpufreq_governor_limits(policy); } } - - mutex_unlock(&dbs_data_mutex); - return ret; + return -EINVAL; } EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); From 94ab5e030fe10cfcc700050cc21535b824943077 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 21 Feb 2016 03:15:34 +0100 Subject: [PATCH 53/61] cpufreq: governor: Make gov_set_update_util() static The gov_set_update_util() routine is only used internally by the common governor code and it doesn't need to be exported, so make it static. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 542c9caf88153..c9a571fd79ac2 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -261,8 +261,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(dbs_update); -void gov_set_update_util(struct policy_dbs_info *policy_dbs, - unsigned int delay_us) +static void gov_set_update_util(struct policy_dbs_info *policy_dbs, + unsigned int delay_us) { struct cpufreq_policy *policy = policy_dbs->policy; int cpu; @@ -276,7 +276,6 @@ void gov_set_update_util(struct policy_dbs_info *policy_dbs, cpufreq_set_update_util_data(cpu, &cdbs->update_util); } } -EXPORT_SYMBOL_GPL(gov_set_update_util); static inline void gov_clear_update_util(struct cpufreq_policy *policy) { From 27de34823984e844f5dc042d39bb43f5dc98966f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Feb 2016 14:14:34 +0100 Subject: [PATCH 54/61] cpufreq: governor: Fix race in dbs_update_util_handler() There is a scenario that may lead to undesired results in dbs_update_util_handler(). Namely, if two CPUs sharing a policy enter the funtion at the same time, pass the sample delay check and then one of them is stalled until dbs_work_handler() (queued up by the other CPU) clears the work counter, it may update the work counter and queue up another work item prematurely. To prevent that from happening, use the observation that the CPU queuing up a work item in dbs_update_util_handler() updates the last sample time. This means that if another CPU was stalling after passing the sample delay check and now successfully updated the work counter as a result of the race described above, it will see the new value of the last sample time which is different from what it used in the sample delay check before. If that happens, the sample delay check passed previously is not valid any more, so the CPU should not continue. Fixes: f17cbb53783c (cpufreq: governor: Avoid atomic operations in hot paths) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index c9a571fd79ac2..064582aa5a0d6 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -340,7 +340,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; - u64 delta_ns; + u64 delta_ns, lst; /* * The work may not be allowed to be queued up right now. @@ -356,7 +356,8 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * of sample_delay_ns used in the computation may be stale. */ smp_rmb(); - delta_ns = time - policy_dbs->last_sample_time; + lst = READ_ONCE(policy_dbs->last_sample_time); + delta_ns = time - lst; if ((s64)delta_ns < policy_dbs->sample_delay_ns) return; @@ -365,9 +366,19 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * at this point. Otherwise, we need to ensure that only one of the * CPUs sharing the policy will do that. */ - if (policy_dbs->is_shared && - !atomic_add_unless(&policy_dbs->work_count, 1, 1)) - return; + if (policy_dbs->is_shared) { + if (!atomic_add_unless(&policy_dbs->work_count, 1, 1)) + return; + + /* + * If another CPU updated last_sample_time in the meantime, we + * shouldn't be here, so clear the work counter and bail out. + */ + if (unlikely(lst != READ_ONCE(policy_dbs->last_sample_time))) { + atomic_set(&policy_dbs->work_count, 0); + return; + } + } policy_dbs->last_sample_time = time; policy_dbs->work_in_progress = true; From f737236b128cac7c355d0650a98c42ae4313f3f1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 14:18:20 +0530 Subject: [PATCH 55/61] cpufreq: governor: Drop unnecessary checks from show() and store() The show() and store() routines in the cpufreq-governor core don't need to check if the struct governor_attr they want to use really provides the callbacks they need as expected (if that's not the case, it means a bug in the code anyway), so change them to avoid doing that. Also change the error value to -EBUSY, if the governor is getting removed and we aren't allowed to store any more changes. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 064582aa5a0d6..70079e21fa2d5 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -125,12 +125,8 @@ static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, { struct dbs_data *dbs_data = to_dbs_data(kobj); struct governor_attr *gattr = to_gov_attr(attr); - int ret = -EIO; - if (gattr->show) - ret = gattr->show(dbs_data, buf); - - return ret; + return gattr->show(dbs_data, buf); } static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, @@ -138,11 +134,11 @@ static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, { struct dbs_data *dbs_data = to_dbs_data(kobj); struct governor_attr *gattr = to_gov_attr(attr); - int ret = -EIO; + int ret = -EBUSY; mutex_lock(&dbs_data->mutex); - if (dbs_data->usage_count && gattr->store) + if (dbs_data->usage_count) ret = gattr->store(dbs_data, buf, count); mutex_unlock(&dbs_data->mutex); From 11eb69b984aae216ae43c79d2d43441ee68a63ca Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:42 +0530 Subject: [PATCH 56/61] cpufreq: Relocate handle_update() to kill its declaration handle_update() is declared at the top of the file as its user appear before its definition. Relocate the routine to get rid of this. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bc93272b4a129..316beffc960ad 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -159,7 +159,6 @@ static inline bool has_target(void) static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); -static void handle_update(struct work_struct *work); /** * Two notifier lists: the "policy" list is involved in the @@ -1072,6 +1071,15 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp return ret; } +static void handle_update(struct work_struct *work) +{ + struct cpufreq_policy *policy = + container_of(work, struct cpufreq_policy, update); + unsigned int cpu = policy->cpu; + pr_debug("handle_update for cpu %u called\n", cpu); + cpufreq_update_policy(cpu); +} + static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); @@ -1453,15 +1461,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) cpufreq_policy_free(policy, true); } -static void handle_update(struct work_struct *work) -{ - struct cpufreq_policy *policy = - container_of(work, struct cpufreq_policy, update); - unsigned int cpu = policy->cpu; - pr_debug("handle_update for cpu %u called\n", cpu); - cpufreq_update_policy(cpu); -} - /** * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're * in deep trouble. From a1317e091ab1386812ee8ab4e3bbd89f2811bc74 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:43 +0530 Subject: [PATCH 57/61] cpufreq: Rename __cpufreq_governor() to cpufreq_governor() The __ at the beginning of the routine aren't really necessary at all. Rename it to cpufreq_governor() instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 44 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 316beffc960ad..b3d05a905034d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -156,8 +156,7 @@ static inline bool has_target(void) } /* internal prototypes */ -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event); +static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); /** @@ -1048,7 +1047,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp down_write(&policy->rwsem); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { pr_err("%s: Failed to stop governor\n", __func__); goto unlock; @@ -1058,9 +1057,9 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp cpumask_set_cpu(cpu, policy->cpus); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); if (ret) pr_err("%s: Failed to start governor\n", __func__); @@ -1382,7 +1381,7 @@ static void cpufreq_offline(unsigned int cpu) down_write(&policy->rwsem); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } @@ -1403,9 +1402,9 @@ static void cpufreq_offline(unsigned int cpu) /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); if (ret) pr_err("%s: Failed to start governor\n", __func__); @@ -1419,7 +1418,7 @@ static void cpufreq_offline(unsigned int cpu) /* If cpu is last user of policy, free policy */ if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1635,7 +1634,7 @@ void cpufreq_suspend(void) for_each_active_policy(policy) { down_write(&policy->rwsem); - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); up_write(&policy->rwsem); if (ret) @@ -1678,9 +1677,9 @@ void cpufreq_resume(void) policy); } else { down_write(&policy->rwsem); - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); up_write(&policy->rwsem); if (ret) @@ -1977,8 +1976,7 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void) return NULL; } -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event) +static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { int ret; @@ -2190,7 +2188,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, old_gov = policy->governor; /* end old governor */ if (old_gov) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { /* This can happen due to race with other operations */ pr_debug("%s: Failed to Stop Governor: %s (%d)\n", @@ -2198,7 +2196,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2208,30 +2206,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, /* start new governor */ policy->governor = new_policy->governor; - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); if (!ret) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) goto out; - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); } /* new governor failed, so re-start old one */ pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) policy->governor = NULL; else - __cpufreq_governor(policy, CPUFREQ_GOV_START); + cpufreq_governor(policy, CPUFREQ_GOV_START); } return ret; out: pr_debug("governor: change or update limits\n"); - return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); } /** @@ -2334,7 +2332,7 @@ static int cpufreq_boost_set_sw(int state) down_write(&policy->rwsem); policy->user_policy.max = policy->max; - __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); up_write(&policy->rwsem); } } From 242aa883a64d8c54cfeee47f3603b21bc705e081 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:44 +0530 Subject: [PATCH 58/61] cpufreq: Remove 'policy->governor_enabled' The entire sequence of events (like INIT/START or STOP/EXIT) for which cpufreq_governor() is called, is guaranteed to be protected by policy->rwsem now. The additional checks that were added earlier (as we were forced to drop policy->rwsem before calling cpufreq_governor() for EXIT event), aren't required anymore. Over that, they weren't sufficient really. They just take care of START/STOP events, but not INIT/EXIT and the state machine was never maintained properly by them. Kill the unnecessary checks and policy->governor_enabled field. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 17 ----------------- include/linux/cpufreq.h | 1 - 2 files changed, 18 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b3d05a905034d..dd568aaf27280 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2010,17 +2010,6 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); - if ((policy->governor_enabled && event == CPUFREQ_GOV_START) - || (!policy->governor_enabled - && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { - return -EBUSY; - } - - if (event == CPUFREQ_GOV_STOP) - policy->governor_enabled = false; - else if (event == CPUFREQ_GOV_START) - policy->governor_enabled = true; - ret = policy->governor->governor(policy, event); if (!ret) { @@ -2028,12 +2017,6 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) policy->governor->initialized++; else if (event == CPUFREQ_GOV_POLICY_EXIT) policy->governor->initialized--; - } else { - /* Restore original values */ - if (event == CPUFREQ_GOV_STOP) - policy->governor_enabled = true; - else if (event == CPUFREQ_GOV_START) - policy->governor_enabled = false; } if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cac3d1ba82002..a50c5b2e3bf2d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,7 +80,6 @@ struct cpufreq_policy { unsigned int last_policy; /* policy before unplug */ struct cpufreq_governor *governor; /* see below */ void *governor_data; - bool governor_enabled; /* governor start/stop flag */ char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ struct work_struct update; /* if update_policy() needs to be From e6f036571e1f65021a442ec7aad087a6a239ecfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 28 Feb 2016 02:33:29 +0100 Subject: [PATCH 59/61] cpufreq: Select IRQ_WORK if CPU_FREQ_GOV_COMMON is set Commit 0eb463be3436 (cpufreq: governor: Replace timers with utilization update callbacks) made CPU_FREQ select IRQ_WORK, but that's not necessary, as it is sufficient for IRQ_WORK to be selected by CPU_FREQ_GOV_COMMON, so modify the cpufreq Kconfig to that effect. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index dcb972a38fbc8..aa403aa2b927f 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -3,7 +3,6 @@ menu "CPU Frequency scaling" config CPU_FREQ bool "CPU Frequency scaling" select SRCU - select IRQ_WORK help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because @@ -20,6 +19,7 @@ config CPU_FREQ if CPU_FREQ config CPU_FREQ_GOV_COMMON + select IRQ_WORK bool config CPU_FREQ_BOOST_SW From 08f511fd41c3afe303eb9b41bff0570f7c1b6937 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Mar 2016 03:58:22 +0100 Subject: [PATCH 60/61] cpufreq: Reduce cpufreq_update_util() overhead a bit Use the observation that cpufreq_update_util() is only called by the scheduler with rq->lock held, so the callers of cpufreq_set_update_util_data() can use synchronize_sched() instead of synchronize_rcu() to wait for cpufreq_update_util() to complete. Moreover, if they are updated to do that, rcu_read_(un)lock() calls in cpufreq_update_util() might be replaced with rcu_read_(un)lock_sched(), respectively, but those aren't really necessary, because the scheduler calls that function from RCU-sched read-side critical sections already. In addition to that, if cpufreq_set_update_util_data() checks the func field in the struct update_util_data before setting the per-CPU pointer to it, the data->func check may be dropped from cpufreq_update_util() as well. Make the above changes to reduce the overhead from cpufreq_update_util() in the scheduler paths invoking it and to make the cleanup after removing its callbacks less heavy-weight somewhat. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) --- drivers/cpufreq/cpufreq.c | 25 +++++++++++++++++-------- drivers/cpufreq/cpufreq_governor.c | 2 +- drivers/cpufreq/intel_pstate.c | 4 ++-- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index dd568aaf27280..6eca12ab71d7a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -115,12 +115,15 @@ static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); * to call from cpufreq_update_util(). That function will be called from an RCU * read-side critical section, so it must not sleep. * - * Callers must use RCU callbacks to free any memory that might be accessed - * via the old update_util_data pointer or invoke synchronize_rcu() right after - * this function to avoid use-after-free. + * Callers must use RCU-sched callbacks to free any memory that might be + * accessed via the old update_util_data pointer or invoke synchronize_sched() + * right after this function to avoid use-after-free. */ void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) { + if (WARN_ON(data && !data->func)) + return; + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); } EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); @@ -133,18 +136,24 @@ EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); * * This function is called by the scheduler on every invocation of * update_load_avg() on the CPU whose utilization is being updated. + * + * It can only be called from RCU-sched read-side critical sections. */ void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) { struct update_util_data *data; - rcu_read_lock(); +#ifdef CONFIG_LOCKDEP + WARN_ON(debug_locks && !rcu_read_lock_sched_held()); +#endif - data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data)); - if (data && data->func) + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + /* + * If this isn't inside of an RCU-sched read-side critical section, data + * may become NULL after the check below. + */ + if (data) data->func(data, time, util, max); - - rcu_read_unlock(); } /* Flag to suspend/resume CPUFreq governors */ diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 70079e21fa2d5..db46190bb246a 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -280,7 +280,7 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) for_each_cpu(i, policy->cpus) cpufreq_set_update_util_data(i, NULL); - synchronize_rcu(); + synchronize_sched(); } static void gov_cancel_work(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f4d85c2ae7b18..2165d2b2fc359 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1168,7 +1168,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); cpufreq_set_update_util_data(cpu_num, NULL); - synchronize_rcu(); + synchronize_sched(); if (hwp_active) return; @@ -1426,7 +1426,7 @@ static int __init intel_pstate_init(void) for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { cpufreq_set_update_util_data(cpu, NULL); - synchronize_rcu(); + synchronize_sched(); kfree(all_cpu_data[cpu]); } } From adaf9fcd136970e480d7ca834c0cf25ce922ea74 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Mar 2016 20:44:47 +0100 Subject: [PATCH 61/61] cpufreq: Move scheduler-related code to the sched directory Create cpufreq.c under kernel/sched/ and move the cpufreq code related to the scheduler to that file and to sched.h. Redefine cpufreq_update_util() as a static inline function to avoid function calls at its call sites in the scheduler code (as suggested by Peter Zijlstra). Also move the definition of struct update_util_data and declaration of cpufreq_set_update_util_data() from include/linux/cpufreq.h to include/linux/sched.h. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- drivers/cpufreq/cpufreq.c | 53 ------------------------------ drivers/cpufreq/cpufreq_governor.c | 1 + include/linux/cpufreq.h | 34 ------------------- include/linux/sched.h | 9 +++++ kernel/sched/Makefile | 1 + kernel/sched/cpufreq.c | 37 +++++++++++++++++++++ kernel/sched/sched.h | 49 ++++++++++++++++++++++++++- 7 files changed, 96 insertions(+), 88 deletions(-) create mode 100644 kernel/sched/cpufreq.c diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6eca12ab71d7a..58e1a39b4d22a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -103,59 +103,6 @@ static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); -static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); - -/** - * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. - * @cpu: The CPU to set the pointer for. - * @data: New pointer value. - * - * Set and publish the update_util_data pointer for the given CPU. That pointer - * points to a struct update_util_data object containing a callback function - * to call from cpufreq_update_util(). That function will be called from an RCU - * read-side critical section, so it must not sleep. - * - * Callers must use RCU-sched callbacks to free any memory that might be - * accessed via the old update_util_data pointer or invoke synchronize_sched() - * right after this function to avoid use-after-free. - */ -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) -{ - if (WARN_ON(data && !data->func)) - return; - - rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); -} -EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); - -/** - * cpufreq_update_util - Take a note about CPU utilization changes. - * @time: Current time. - * @util: Current utilization. - * @max: Utilization ceiling. - * - * This function is called by the scheduler on every invocation of - * update_load_avg() on the CPU whose utilization is being updated. - * - * It can only be called from RCU-sched read-side critical sections. - */ -void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) -{ - struct update_util_data *data; - -#ifdef CONFIG_LOCKDEP - WARN_ON(debug_locks && !rcu_read_lock_sched_held()); -#endif - - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); - /* - * If this isn't inside of an RCU-sched read-side critical section, data - * may become NULL after the check below. - */ - if (data) - data->func(data, time, util, max); -} - /* Flag to suspend/resume CPUFreq governors */ static bool cpufreq_suspended; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index db46190bb246a..1c25ef4056164 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "cpufreq_governor.h" diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a50c5b2e3bf2d..a5ea52f793f34 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -146,36 +146,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ -void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. - * - * The way cpufreq is currently arranged requires it to evaluate the CPU - * performance state (frequency/voltage) on a regular basis to prevent it from - * being stuck in a completely inadequate performance level for too long. - * That is not guaranteed to happen if the updates are only triggered from CFS, - * though, because they may not be coming in if RT or deadline tasks are active - * all the time (or there are RT and DL tasks only). - * - * As a workaround for that issue, this function is called by the RT and DL - * sched classes to trigger extra cpufreq updates to prevent it from stalling, - * but that really is a band-aid. Going forward it should be replaced with - * solutions targeted more specifically at RT and DL tasks. - */ -static inline void cpufreq_trigger_update(u64 time) -{ - cpufreq_update_util(time, ULONG_MAX, 0); -} - -struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); -}; - -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); - unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -187,10 +157,6 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else -static inline void cpufreq_update_util(u64 time, unsigned long util, - unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} - static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc30..913e755ef7b8c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#ifdef CONFIG_CPU_FREQ +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); +#endif /* CONFIG_CPU_FREQ */ + #endif diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 67687973ce80d..9507522164ac6 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o +obj-$(CONFIG_CPU_FREQ) += cpufreq.o diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c new file mode 100644 index 0000000000000..928c4ba32f683 --- /dev/null +++ b/kernel/sched/cpufreq.c @@ -0,0 +1,37 @@ +/* + * Scheduler code and data structures related to cpufreq. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "sched.h" + +DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); + +/** + * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. + * @cpu: The CPU to set the pointer for. + * @data: New pointer value. + * + * Set and publish the update_util_data pointer for the given CPU. That pointer + * points to a struct update_util_data object containing a callback function + * to call from cpufreq_update_util(). That function will be called from an RCU + * read-side critical section, so it must not sleep. + * + * Callers must use RCU-sched callbacks to free any memory that might be + * accessed via the old update_util_data pointer or invoke synchronize_sched() + * right after this function to avoid use-after-free. + */ +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) +{ + if (WARN_ON(data && !data->func)) + return; + + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); +} +EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f042190c80021..faf7e2758dd03 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -9,7 +9,6 @@ #include #include #include -#include #include "cpupri.h" #include "cpudeadline.h" @@ -1739,3 +1738,51 @@ static inline u64 irq_time_read(int cpu) } #endif /* CONFIG_64BIT */ #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ + +#ifdef CONFIG_CPU_FREQ +DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); + +/** + * cpufreq_update_util - Take a note about CPU utilization changes. + * @time: Current time. + * @util: Current utilization. + * @max: Utilization ceiling. + * + * This function is called by the scheduler on every invocation of + * update_load_avg() on the CPU whose utilization is being updated. + * + * It can only be called from RCU-sched read-side critical sections. + */ +static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) +{ + struct update_util_data *data; + + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data) + data->func(data, time, util, max); +} + +/** + * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. + * @time: Current time. + * + * The way cpufreq is currently arranged requires it to evaluate the CPU + * performance state (frequency/voltage) on a regular basis to prevent it from + * being stuck in a completely inadequate performance level for too long. + * That is not guaranteed to happen if the updates are only triggered from CFS, + * though, because they may not be coming in if RT or deadline tasks are active + * all the time (or there are RT and DL tasks only). + * + * As a workaround for that issue, this function is called by the RT and DL + * sched classes to trigger extra cpufreq updates to prevent it from stalling, + * but that really is a band-aid. Going forward it should be replaced with + * solutions targeted more specifically at RT and DL tasks. + */ +static inline void cpufreq_trigger_update(u64 time) +{ + cpufreq_update_util(time, ULONG_MAX, 0); +} +#else +static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} +static inline void cpufreq_trigger_update(u64 time) {} +#endif /* CONFIG_CPU_FREQ */