diff --git a/[refs] b/[refs] index 419d7f405500..855046522e63 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: eaf729c8a8bfc9c7a5ff5659e3b2584bf2ef22e1 +refs/heads/master: 8f40a9f5325cdceddb1610cb3dfd8cb532f5a618 diff --git a/trunk/.gitignore b/trunk/.gitignore index a232295b99ac..8d15830b883d 100644 --- a/trunk/.gitignore +++ b/trunk/.gitignore @@ -22,7 +22,6 @@ tags TAGS vmlinux* -!vmlinux.lds.S System.map Module.symvers diff --git a/trunk/arch/powerpc/configs/cell_defconfig b/trunk/arch/powerpc/configs/cell_defconfig index d9ac24e8de16..74f83f4a4e5e 100644 --- a/trunk/arch/powerpc/configs/cell_defconfig +++ b/trunk/arch/powerpc/configs/cell_defconfig @@ -1455,8 +1455,7 @@ CONFIG_HAS_DMA=y # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=m -CONFIG_OPROFILE_CELL=y +CONFIG_OPROFILE=y # CONFIG_KPROBES is not set # diff --git a/trunk/arch/powerpc/kernel/crash.c b/trunk/arch/powerpc/kernel/crash.c index 37658ea417fa..d3f2080d2eee 100644 --- a/trunk/arch/powerpc/kernel/crash.c +++ b/trunk/arch/powerpc/kernel/crash.c @@ -219,72 +219,6 @@ void crash_kexec_secondary(struct pt_regs *regs) cpus_in_sr = CPU_MASK_NONE; } #endif -#ifdef CONFIG_SPU_BASE - -#include -#include - -struct crash_spu_info { - struct spu *spu; - u32 saved_spu_runcntl_RW; - u32 saved_spu_status_R; - u32 saved_spu_npc_RW; - u64 saved_mfc_sr1_RW; - u64 saved_mfc_dar; - u64 saved_mfc_dsisr; -}; - -#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ -static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; - -static void crash_kexec_stop_spus(void) -{ - struct spu *spu; - int i; - u64 tmp; - - for (i = 0; i < CRASH_NUM_SPUS; i++) { - if (!crash_spu_info[i].spu) - continue; - - spu = crash_spu_info[i].spu; - - crash_spu_info[i].saved_spu_runcntl_RW = - in_be32(&spu->problem->spu_runcntl_RW); - crash_spu_info[i].saved_spu_status_R = - in_be32(&spu->problem->spu_status_R); - crash_spu_info[i].saved_spu_npc_RW = - in_be32(&spu->problem->spu_npc_RW); - - crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); - crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); - tmp = spu_mfc_sr1_get(spu); - crash_spu_info[i].saved_mfc_sr1_RW = tmp; - - tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; - spu_mfc_sr1_set(spu, tmp); - - __delay(200); - } -} - -void crash_register_spus(struct list_head *list) -{ - struct spu *spu; - - list_for_each_entry(spu, list, full_list) { - if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) - continue; - - crash_spu_info[spu->number].spu = spu; - } -} - -#else -static inline void crash_kexec_stop_spus(void) -{ -} -#endif /* CONFIG_SPU_BASE */ void default_machine_crash_shutdown(struct pt_regs *regs) { @@ -320,7 +254,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); - crash_kexec_stop_spus(); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c index 727a6699f2f4..e5df167f7824 100644 --- a/trunk/arch/powerpc/kernel/time.c +++ b/trunk/arch/powerpc/kernel/time.c @@ -122,7 +122,6 @@ extern struct timezone sys_tz; static long timezone_offset; unsigned long ppc_proc_freq; -EXPORT_SYMBOL(ppc_proc_freq); unsigned long ppc_tb_freq; static u64 tb_last_jiffy __cacheline_aligned_in_smp; diff --git a/trunk/arch/powerpc/oprofile/Kconfig b/trunk/arch/powerpc/oprofile/Kconfig index 7089e79689b9..eb2dece76a54 100644 --- a/trunk/arch/powerpc/oprofile/Kconfig +++ b/trunk/arch/powerpc/oprofile/Kconfig @@ -15,10 +15,3 @@ config OPROFILE If unsure, say N. -config OPROFILE_CELL - bool "OProfile for Cell Broadband Engine" - depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m) - default y - help - Profiling of Cell BE SPUs requires special support enabled - by this option. diff --git a/trunk/arch/powerpc/oprofile/Makefile b/trunk/arch/powerpc/oprofile/Makefile index c5f64c3bd668..4b5f9528218c 100644 --- a/trunk/arch/powerpc/oprofile/Makefile +++ b/trunk/arch/powerpc/oprofile/Makefile @@ -11,9 +11,7 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ timer_int.o ) oprofile-y := $(DRIVER_OBJS) common.o backtrace.o -oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ - cell/spu_profiler.o cell/vma_map.o \ - cell/spu_task_sync.o +oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o oprofile-$(CONFIG_6xx) += op_model_7450.o diff --git a/trunk/arch/powerpc/oprofile/cell/pr_util.h b/trunk/arch/powerpc/oprofile/cell/pr_util.h deleted file mode 100644 index e5704f00c8b4..000000000000 --- a/trunk/arch/powerpc/oprofile/cell/pr_util.h +++ /dev/null @@ -1,97 +0,0 @@ - /* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Author: Maynard Johnson - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef PR_UTIL_H -#define PR_UTIL_H - -#include -#include -#include -#include - -#include "../../platforms/cell/cbe_regs.h" - -/* Defines used for sync_start */ -#define SKIP_GENERIC_SYNC 0 -#define SYNC_START_ERROR -1 -#define DO_GENERIC_SYNC 1 - -struct spu_overlay_info { /* map of sections within an SPU overlay */ - unsigned int vma; /* SPU virtual memory address from elf */ - unsigned int size; /* size of section from elf */ - unsigned int offset; /* offset of section into elf file */ - unsigned int buf; -}; - -struct vma_to_fileoffset_map { /* map of sections within an SPU program */ - struct vma_to_fileoffset_map *next; /* list pointer */ - unsigned int vma; /* SPU virtual memory address from elf */ - unsigned int size; /* size of section from elf */ - unsigned int offset; /* offset of section into elf file */ - unsigned int guard_ptr; - unsigned int guard_val; - /* - * The guard pointer is an entry in the _ovly_buf_table, - * computed using ovly.buf as the index into the table. Since - * ovly.buf values begin at '1' to reference the first (or 0th) - * entry in the _ovly_buf_table, the computation subtracts 1 - * from ovly.buf. - * The guard value is stored in the _ovly_buf_table entry and - * is an index (starting at 1) back to the _ovly_table entry - * that is pointing at this _ovly_buf_table entry. So, for - * example, for an overlay scenario with one overlay segment - * and two overlay sections: - * - Section 1 points to the first entry of the - * _ovly_buf_table, which contains a guard value - * of '1', referencing the first (index=0) entry of - * _ovly_table. - * - Section 2 points to the second entry of the - * _ovly_buf_table, which contains a guard value - * of '2', referencing the second (index=1) entry of - * _ovly_table. - */ - -}; - -/* The three functions below are for maintaining and accessing - * the vma-to-fileoffset map. - */ -struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, - u64 objectid); -unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, - unsigned int vma, const struct spu *aSpu, - int *grd_val); -void vma_map_free(struct vma_to_fileoffset_map *map); - -/* - * Entry point for SPU profiling. - * cycles_reset is the SPU_CYCLES count value specified by the user. - */ -int start_spu_profiling(unsigned int cycles_reset); - -void stop_spu_profiling(void); - - -/* add the necessary profiling hooks */ -int spu_sync_start(void); - -/* remove the hooks */ -int spu_sync_stop(void); - -/* Record SPU program counter samples to the oprofile event buffer. */ -void spu_sync_buffer(int spu_num, unsigned int *samples, - int num_samples); - -void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset); - -#endif /* PR_UTIL_H */ diff --git a/trunk/arch/powerpc/oprofile/cell/spu_profiler.c b/trunk/arch/powerpc/oprofile/cell/spu_profiler.c deleted file mode 100644 index 380d7e217531..000000000000 --- a/trunk/arch/powerpc/oprofile/cell/spu_profiler.c +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Authors: Maynard Johnson - * Carl Love - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include "pr_util.h" - -#define TRACE_ARRAY_SIZE 1024 -#define SCALE_SHIFT 14 - -static u32 *samples; - -static int spu_prof_running; -static unsigned int profiling_interval; - -#define NUM_SPU_BITS_TRBUF 16 -#define SPUS_PER_TB_ENTRY 4 -#define SPUS_PER_NODE 8 - -#define SPU_PC_MASK 0xFFFF - -static DEFINE_SPINLOCK(sample_array_lock); -unsigned long sample_array_lock_flags; - -void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) -{ - unsigned long ns_per_cyc; - - if (!freq_khz) - freq_khz = ppc_proc_freq/1000; - - /* To calculate a timeout in nanoseconds, the basic - * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency). - * To avoid floating point math, we use the scale math - * technique as described in linux/jiffies.h. We use - * a scale factor of SCALE_SHIFT, which provides 4 decimal places - * of precision. This is close enough for the purpose at hand. - * - * The value of the timeout should be small enough that the hw - * trace buffer will not get more then about 1/3 full for the - * maximum user specified (the LFSR value) hw sampling frequency. - * This is to ensure the trace buffer will never fill even if the - * kernel thread scheduling varies under a heavy system load. - */ - - ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz; - profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT; - -} - -/* - * Extract SPU PC from trace buffer entry - */ -static void spu_pc_extract(int cpu, int entry) -{ - /* the trace buffer is 128 bits */ - u64 trace_buffer[2]; - u64 spu_mask; - int spu; - - spu_mask = SPU_PC_MASK; - - /* Each SPU PC is 16 bits; hence, four spus in each of - * the two 64-bit buffer entries that make up the - * 128-bit trace_buffer entry. Process two 64-bit values - * simultaneously. - * trace[0] SPU PC contents are: 0 1 2 3 - * trace[1] SPU PC contents are: 4 5 6 7 - */ - - cbe_read_trace_buffer(cpu, trace_buffer); - - for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) { - /* spu PC trace entry is upper 16 bits of the - * 18 bit SPU program counter - */ - samples[spu * TRACE_ARRAY_SIZE + entry] - = (spu_mask & trace_buffer[0]) << 2; - samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry] - = (spu_mask & trace_buffer[1]) << 2; - - trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF; - trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF; - } -} - -static int cell_spu_pc_collection(int cpu) -{ - u32 trace_addr; - int entry; - - /* process the collected SPU PC for the node */ - - entry = 0; - - trace_addr = cbe_read_pm(cpu, trace_address); - while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { - /* there is data in the trace buffer to process */ - spu_pc_extract(cpu, entry); - - entry++; - - if (entry >= TRACE_ARRAY_SIZE) - /* spu_samples is full */ - break; - - trace_addr = cbe_read_pm(cpu, trace_address); - } - - return entry; -} - - -static enum hrtimer_restart profile_spus(struct hrtimer *timer) -{ - ktime_t kt; - int cpu, node, k, num_samples, spu_num; - - if (!spu_prof_running) - goto stop; - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - node = cbe_cpu_to_node(cpu); - - /* There should only be one kernel thread at a time processing - * the samples. In the very unlikely case that the processing - * is taking a very long time and multiple kernel threads are - * started to process the samples. Make sure only one kernel - * thread is working on the samples array at a time. The - * sample array must be loaded and then processed for a given - * cpu. The sample array is not per cpu. - */ - spin_lock_irqsave(&sample_array_lock, - sample_array_lock_flags); - num_samples = cell_spu_pc_collection(cpu); - - if (num_samples == 0) { - spin_unlock_irqrestore(&sample_array_lock, - sample_array_lock_flags); - continue; - } - - for (k = 0; k < SPUS_PER_NODE; k++) { - spu_num = k + (node * SPUS_PER_NODE); - spu_sync_buffer(spu_num, - samples + (k * TRACE_ARRAY_SIZE), - num_samples); - } - - spin_unlock_irqrestore(&sample_array_lock, - sample_array_lock_flags); - - } - smp_wmb(); /* insure spu event buffer updates are written */ - /* don't want events intermingled... */ - - kt = ktime_set(0, profiling_interval); - if (!spu_prof_running) - goto stop; - hrtimer_forward(timer, timer->base->get_time(), kt); - return HRTIMER_RESTART; - - stop: - printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n"); - return HRTIMER_NORESTART; -} - -static struct hrtimer timer; -/* - * Entry point for SPU profiling. - * NOTE: SPU profiling is done system-wide, not per-CPU. - * - * cycles_reset is the count value specified by the user when - * setting up OProfile to count SPU_CYCLES. - */ -int start_spu_profiling(unsigned int cycles_reset) -{ - ktime_t kt; - - pr_debug("timer resolution: %lu\n", TICK_NSEC); - kt = ktime_set(0, profiling_interval); - hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - timer.expires = kt; - timer.function = profile_spus; - - /* Allocate arrays for collecting SPU PC samples */ - samples = kzalloc(SPUS_PER_NODE * - TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL); - - if (!samples) - return -ENOMEM; - - spu_prof_running = 1; - hrtimer_start(&timer, kt, HRTIMER_MODE_REL); - - return 0; -} - -void stop_spu_profiling(void) -{ - spu_prof_running = 0; - hrtimer_cancel(&timer); - kfree(samples); - pr_debug("SPU_PROF: stop_spu_profiling issued\n"); -} diff --git a/trunk/arch/powerpc/oprofile/cell/spu_task_sync.c b/trunk/arch/powerpc/oprofile/cell/spu_task_sync.c deleted file mode 100644 index 133665754a75..000000000000 --- a/trunk/arch/powerpc/oprofile/cell/spu_task_sync.c +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Author: Maynard Johnson - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* The purpose of this file is to handle SPU event task switching - * and to record SPU context information into the OProfile - * event buffer. - * - * Additionally, the spu_sync_buffer function is provided as a helper - * for recoding actual SPU program counter samples to the event buffer. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "pr_util.h" - -#define RELEASE_ALL 9999 - -static DEFINE_SPINLOCK(buffer_lock); -static DEFINE_SPINLOCK(cache_lock); -static int num_spu_nodes; -int spu_prof_num_nodes; -int last_guard_val[MAX_NUMNODES * 8]; - -/* Container for caching information about an active SPU task. */ -struct cached_info { - struct vma_to_fileoffset_map *map; - struct spu *the_spu; /* needed to access pointer to local_store */ - struct kref cache_ref; -}; - -static struct cached_info *spu_info[MAX_NUMNODES * 8]; - -static void destroy_cached_info(struct kref *kref) -{ - struct cached_info *info; - - info = container_of(kref, struct cached_info, cache_ref); - vma_map_free(info->map); - kfree(info); - module_put(THIS_MODULE); -} - -/* Return the cached_info for the passed SPU number. - * ATTENTION: Callers are responsible for obtaining the - * cache_lock if needed prior to invoking this function. - */ -static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num) -{ - struct kref *ref; - struct cached_info *ret_info; - - if (spu_num >= num_spu_nodes) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Invalid index %d into spu info cache\n", - __FUNCTION__, __LINE__, spu_num); - ret_info = NULL; - goto out; - } - if (!spu_info[spu_num] && the_spu) { - ref = spu_get_profile_private_kref(the_spu->ctx); - if (ref) { - spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref); - kref_get(&spu_info[spu_num]->cache_ref); - } - } - - ret_info = spu_info[spu_num]; - out: - return ret_info; -} - - -/* Looks for cached info for the passed spu. If not found, the - * cached info is created for the passed spu. - * Returns 0 for success; otherwise, -1 for error. - */ -static int -prepare_cached_spu_info(struct spu *spu, unsigned long objectId) -{ - unsigned long flags; - struct vma_to_fileoffset_map *new_map; - int retval = 0; - struct cached_info *info; - - /* We won't bother getting cache_lock here since - * don't do anything with the cached_info that's returned. - */ - info = get_cached_info(spu, spu->number); - - if (info) { - pr_debug("Found cached SPU info.\n"); - goto out; - } - - /* Create cached_info and set spu_info[spu->number] to point to it. - * spu->number is a system-wide value, not a per-node value. - */ - info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); - if (!info) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: create vma_map failed\n", - __FUNCTION__, __LINE__); - retval = -ENOMEM; - goto err_alloc; - } - new_map = create_vma_map(spu, objectId); - if (!new_map) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: create vma_map failed\n", - __FUNCTION__, __LINE__); - retval = -ENOMEM; - goto err_alloc; - } - - pr_debug("Created vma_map\n"); - info->map = new_map; - info->the_spu = spu; - kref_init(&info->cache_ref); - spin_lock_irqsave(&cache_lock, flags); - spu_info[spu->number] = info; - /* Increment count before passing off ref to SPUFS. */ - kref_get(&info->cache_ref); - - /* We increment the module refcount here since SPUFS is - * responsible for the final destruction of the cached_info, - * and it must be able to access the destroy_cached_info() - * function defined in the OProfile module. We decrement - * the module refcount in destroy_cached_info. - */ - try_module_get(THIS_MODULE); - spu_set_profile_private_kref(spu->ctx, &info->cache_ref, - destroy_cached_info); - spin_unlock_irqrestore(&cache_lock, flags); - goto out; - -err_alloc: - kfree(info); -out: - return retval; -} - -/* - * NOTE: The caller is responsible for locking the - * cache_lock prior to calling this function. - */ -static int release_cached_info(int spu_index) -{ - int index, end; - - if (spu_index == RELEASE_ALL) { - end = num_spu_nodes; - index = 0; - } else { - if (spu_index >= num_spu_nodes) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: " - "Invalid index %d into spu info cache\n", - __FUNCTION__, __LINE__, spu_index); - goto out; - } - end = spu_index + 1; - index = spu_index; - } - for (; index < end; index++) { - if (spu_info[index]) { - kref_put(&spu_info[index]->cache_ref, - destroy_cached_info); - spu_info[index] = NULL; - } - } - -out: - return 0; -} - -/* The source code for fast_get_dcookie was "borrowed" - * from drivers/oprofile/buffer_sync.c. - */ - -/* Optimisation. We can manage without taking the dcookie sem - * because we cannot reach this code without at least one - * dcookie user still being registered (namely, the reader - * of the event buffer). - */ -static inline unsigned long fast_get_dcookie(struct dentry *dentry, - struct vfsmount *vfsmnt) -{ - unsigned long cookie; - - if (dentry->d_cookie) - return (unsigned long)dentry; - get_dcookie(dentry, vfsmnt, &cookie); - return cookie; -} - -/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, - * which corresponds loosely to "application name". Also, determine - * the offset for the SPU ELF object. If computed offset is - * non-zero, it implies an embedded SPU object; otherwise, it's a - * separate SPU binary, in which case we retrieve it's dcookie. - * For the embedded case, we must determine if SPU ELF is embedded - * in the executable application or another file (i.e., shared lib). - * If embedded in a shared lib, we must get the dcookie and return - * that to the caller. - */ -static unsigned long -get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, - unsigned long *spu_bin_dcookie, - unsigned long spu_ref) -{ - unsigned long app_cookie = 0; - unsigned int my_offset = 0; - struct file *app = NULL; - struct vm_area_struct *vma; - struct mm_struct *mm = spu->mm; - - if (!mm) - goto out; - - down_read(&mm->mmap_sem); - - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (!(vma->vm_flags & VM_EXECUTABLE)) - continue; - app_cookie = fast_get_dcookie(vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); - pr_debug("got dcookie for %s\n", - vma->vm_file->f_dentry->d_name.name); - app = vma->vm_file; - break; - } - - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref) - continue; - my_offset = spu_ref - vma->vm_start; - if (!vma->vm_file) - goto fail_no_image_cookie; - - pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n", - my_offset, spu_ref, - vma->vm_file->f_dentry->d_name.name); - *offsetp = my_offset; - break; - } - - *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); - pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name); - - up_read(&mm->mmap_sem); - -out: - return app_cookie; - -fail_no_image_cookie: - up_read(&mm->mmap_sem); - - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Cannot find dcookie for SPU binary\n", - __FUNCTION__, __LINE__); - goto out; -} - - - -/* This function finds or creates cached context information for the - * passed SPU and records SPU context information into the OProfile - * event buffer. - */ -static int process_context_switch(struct spu *spu, unsigned long objectId) -{ - unsigned long flags; - int retval; - unsigned int offset = 0; - unsigned long spu_cookie = 0, app_dcookie; - - retval = prepare_cached_spu_info(spu, objectId); - if (retval) - goto out; - - /* Get dcookie first because a mutex_lock is taken in that - * code path, so interrupts must not be disabled. - */ - app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId); - if (!app_dcookie || !spu_cookie) { - retval = -ENOENT; - goto out; - } - - /* Record context info in event buffer */ - spin_lock_irqsave(&buffer_lock, flags); - add_event_entry(ESCAPE_CODE); - add_event_entry(SPU_CTX_SWITCH_CODE); - add_event_entry(spu->number); - add_event_entry(spu->pid); - add_event_entry(spu->tgid); - add_event_entry(app_dcookie); - add_event_entry(spu_cookie); - add_event_entry(offset); - spin_unlock_irqrestore(&buffer_lock, flags); - smp_wmb(); /* insure spu event buffer updates are written */ - /* don't want entries intermingled... */ -out: - return retval; -} - -/* - * This function is invoked on either a bind_context or unbind_context. - * If called for an unbind_context, the val arg is 0; otherwise, - * it is the object-id value for the spu context. - * The data arg is of type 'struct spu *'. - */ -static int spu_active_notify(struct notifier_block *self, unsigned long val, - void *data) -{ - int retval; - unsigned long flags; - struct spu *the_spu = data; - - pr_debug("SPU event notification arrived\n"); - if (!val) { - spin_lock_irqsave(&cache_lock, flags); - retval = release_cached_info(the_spu->number); - spin_unlock_irqrestore(&cache_lock, flags); - } else { - retval = process_context_switch(the_spu, val); - } - return retval; -} - -static struct notifier_block spu_active = { - .notifier_call = spu_active_notify, -}; - -static int number_of_online_nodes(void) -{ - u32 cpu; u32 tmp; - int nodes = 0; - for_each_online_cpu(cpu) { - tmp = cbe_cpu_to_node(cpu) + 1; - if (tmp > nodes) - nodes++; - } - return nodes; -} - -/* The main purpose of this function is to synchronize - * OProfile with SPUFS by registering to be notified of - * SPU task switches. - * - * NOTE: When profiling SPUs, we must ensure that only - * spu_sync_start is invoked and not the generic sync_start - * in drivers/oprofile/oprof.c. A return value of - * SKIP_GENERIC_SYNC or SYNC_START_ERROR will - * accomplish this. - */ -int spu_sync_start(void) -{ - int k; - int ret = SKIP_GENERIC_SYNC; - int register_ret; - unsigned long flags = 0; - - spu_prof_num_nodes = number_of_online_nodes(); - num_spu_nodes = spu_prof_num_nodes * 8; - - spin_lock_irqsave(&buffer_lock, flags); - add_event_entry(ESCAPE_CODE); - add_event_entry(SPU_PROFILING_CODE); - add_event_entry(num_spu_nodes); - spin_unlock_irqrestore(&buffer_lock, flags); - - /* Register for SPU events */ - register_ret = spu_switch_event_register(&spu_active); - if (register_ret) { - ret = SYNC_START_ERROR; - goto out; - } - - for (k = 0; k < (MAX_NUMNODES * 8); k++) - last_guard_val[k] = 0; - pr_debug("spu_sync_start -- running.\n"); -out: - return ret; -} - -/* Record SPU program counter samples to the oprofile event buffer. */ -void spu_sync_buffer(int spu_num, unsigned int *samples, - int num_samples) -{ - unsigned long long file_offset; - unsigned long flags; - int i; - struct vma_to_fileoffset_map *map; - struct spu *the_spu; - unsigned long long spu_num_ll = spu_num; - unsigned long long spu_num_shifted = spu_num_ll << 32; - struct cached_info *c_info; - - /* We need to obtain the cache_lock here because it's - * possible that after getting the cached_info, the SPU job - * corresponding to this cached_info may end, thus resulting - * in the destruction of the cached_info. - */ - spin_lock_irqsave(&cache_lock, flags); - c_info = get_cached_info(NULL, spu_num); - if (!c_info) { - /* This legitimately happens when the SPU task ends before all - * samples are recorded. - * No big deal -- so we just drop a few samples. - */ - pr_debug("SPU_PROF: No cached SPU contex " - "for SPU #%d. Dropping samples.\n", spu_num); - goto out; - } - - map = c_info->map; - the_spu = c_info->the_spu; - spin_lock(&buffer_lock); - for (i = 0; i < num_samples; i++) { - unsigned int sample = *(samples+i); - int grd_val = 0; - file_offset = 0; - if (sample == 0) - continue; - file_offset = vma_map_lookup( map, sample, the_spu, &grd_val); - - /* If overlays are used by this SPU application, the guard - * value is non-zero, indicating which overlay section is in - * use. We need to discard samples taken during the time - * period which an overlay occurs (i.e., guard value changes). - */ - if (grd_val && grd_val != last_guard_val[spu_num]) { - last_guard_val[spu_num] = grd_val; - /* Drop the rest of the samples. */ - break; - } - - add_event_entry(file_offset | spu_num_shifted); - } - spin_unlock(&buffer_lock); -out: - spin_unlock_irqrestore(&cache_lock, flags); -} - - -int spu_sync_stop(void) -{ - unsigned long flags = 0; - int ret = spu_switch_event_unregister(&spu_active); - if (ret) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: spu_switch_event_unregister returned %d\n", - __FUNCTION__, __LINE__, ret); - goto out; - } - - spin_lock_irqsave(&cache_lock, flags); - ret = release_cached_info(RELEASE_ALL); - spin_unlock_irqrestore(&cache_lock, flags); -out: - pr_debug("spu_sync_stop -- done.\n"); - return ret; -} - - diff --git a/trunk/arch/powerpc/oprofile/cell/vma_map.c b/trunk/arch/powerpc/oprofile/cell/vma_map.c deleted file mode 100644 index 76ec1d16aef7..000000000000 --- a/trunk/arch/powerpc/oprofile/cell/vma_map.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Author: Maynard Johnson - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* The code in this source file is responsible for generating - * vma-to-fileOffset maps for both overlay and non-overlay SPU - * applications. - */ - -#include -#include -#include -#include -#include "pr_util.h" - - -void vma_map_free(struct vma_to_fileoffset_map *map) -{ - while (map) { - struct vma_to_fileoffset_map *next = map->next; - kfree(map); - map = next; - } -} - -unsigned int -vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma, - const struct spu *aSpu, int *grd_val) -{ - /* - * Default the offset to the physical address + a flag value. - * Addresses of dynamically generated code can't be found in the vma - * map. For those addresses the flagged value will be sent on to - * the user space tools so they can be reported rather than just - * thrown away. - */ - u32 offset = 0x10000000 + vma; - u32 ovly_grd; - - for (; map; map = map->next) { - if (vma < map->vma || vma >= map->vma + map->size) - continue; - - if (map->guard_ptr) { - ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr); - if (ovly_grd != map->guard_val) - continue; - *grd_val = ovly_grd; - } - offset = vma - map->vma + map->offset; - break; - } - - return offset; -} - -static struct vma_to_fileoffset_map * -vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma, - unsigned int size, unsigned int offset, unsigned int guard_ptr, - unsigned int guard_val) -{ - struct vma_to_fileoffset_map *new = - kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL); - if (!new) { - printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", - __FUNCTION__, __LINE__); - vma_map_free(map); - return NULL; - } - - new->next = map; - new->vma = vma; - new->size = size; - new->offset = offset; - new->guard_ptr = guard_ptr; - new->guard_val = guard_val; - - return new; -} - - -/* Parse SPE ELF header and generate a list of vma_maps. - * A pointer to the first vma_map in the generated list - * of vma_maps is returned. */ -struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu, - unsigned long spu_elf_start) -{ - static const unsigned char expected[EI_PAD] = { - [EI_MAG0] = ELFMAG0, - [EI_MAG1] = ELFMAG1, - [EI_MAG2] = ELFMAG2, - [EI_MAG3] = ELFMAG3, - [EI_CLASS] = ELFCLASS32, - [EI_DATA] = ELFDATA2MSB, - [EI_VERSION] = EV_CURRENT, - [EI_OSABI] = ELFOSABI_NONE - }; - - int grd_val; - struct vma_to_fileoffset_map *map = NULL; - struct spu_overlay_info ovly; - unsigned int overlay_tbl_offset = -1; - unsigned long phdr_start, shdr_start; - Elf32_Ehdr ehdr; - Elf32_Phdr phdr; - Elf32_Shdr shdr, shdr_str; - Elf32_Sym sym; - int i, j; - char name[32]; - - unsigned int ovly_table_sym = 0; - unsigned int ovly_buf_table_sym = 0; - unsigned int ovly_table_end_sym = 0; - unsigned int ovly_buf_table_end_sym = 0; - unsigned long ovly_table; - unsigned int n_ovlys; - - /* Get and validate ELF header. */ - - if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr))) - goto fail; - - if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Unexpected e_ident parsing SPU ELF\n", - __FUNCTION__, __LINE__); - goto fail; - } - if (ehdr.e_machine != EM_SPU) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Unexpected e_machine parsing SPU ELF\n", - __FUNCTION__, __LINE__); - goto fail; - } - if (ehdr.e_type != ET_EXEC) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Unexpected e_type parsing SPU ELF\n", - __FUNCTION__, __LINE__); - goto fail; - } - phdr_start = spu_elf_start + ehdr.e_phoff; - shdr_start = spu_elf_start + ehdr.e_shoff; - - /* Traverse program headers. */ - for (i = 0; i < ehdr.e_phnum; i++) { - if (copy_from_user(&phdr, - (void *) (phdr_start + i * sizeof(phdr)), - sizeof(phdr))) - goto fail; - - if (phdr.p_type != PT_LOAD) - continue; - if (phdr.p_flags & (1 << 27)) - continue; - - map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz, - phdr.p_offset, 0, 0); - if (!map) - goto fail; - } - - pr_debug("SPU_PROF: Created non-overlay maps\n"); - /* Traverse section table and search for overlay-related symbols. */ - for (i = 0; i < ehdr.e_shnum; i++) { - if (copy_from_user(&shdr, - (void *) (shdr_start + i * sizeof(shdr)), - sizeof(shdr))) - goto fail; - - if (shdr.sh_type != SHT_SYMTAB) - continue; - if (shdr.sh_entsize != sizeof (sym)) - continue; - - if (copy_from_user(&shdr_str, - (void *) (shdr_start + shdr.sh_link * - sizeof(shdr)), - sizeof(shdr))) - goto fail; - - if (shdr_str.sh_type != SHT_STRTAB) - goto fail;; - - for (j = 0; j < shdr.sh_size / sizeof (sym); j++) { - if (copy_from_user(&sym, (void *) (spu_elf_start + - shdr.sh_offset + j * - sizeof (sym)), - sizeof (sym))) - goto fail; - - if (copy_from_user(name, (void *) - (spu_elf_start + shdr_str.sh_offset + - sym.st_name), - 20)) - goto fail; - - if (memcmp(name, "_ovly_table", 12) == 0) - ovly_table_sym = sym.st_value; - if (memcmp(name, "_ovly_buf_table", 16) == 0) - ovly_buf_table_sym = sym.st_value; - if (memcmp(name, "_ovly_table_end", 16) == 0) - ovly_table_end_sym = sym.st_value; - if (memcmp(name, "_ovly_buf_table_end", 20) == 0) - ovly_buf_table_end_sym = sym.st_value; - } - } - - /* If we don't have overlays, we're done. */ - if (ovly_table_sym == 0 || ovly_buf_table_sym == 0 - || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) { - pr_debug("SPU_PROF: No overlay table found\n"); - goto out; - } else { - pr_debug("SPU_PROF: Overlay table found\n"); - } - - /* The _ovly_table symbol represents a table with one entry - * per overlay section. The _ovly_buf_table symbol represents - * a table with one entry per overlay region. - * The struct spu_overlay_info gives the structure of the _ovly_table - * entries. The structure of _ovly_table_buf is simply one - * u32 word per entry. - */ - overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym, - aSpu, &grd_val); - if (overlay_tbl_offset < 0) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Error finding SPU overlay table\n", - __FUNCTION__, __LINE__); - goto fail; - } - ovly_table = spu_elf_start + overlay_tbl_offset; - - n_ovlys = (ovly_table_end_sym - - ovly_table_sym) / sizeof (ovly); - - /* Traverse overlay table. */ - for (i = 0; i < n_ovlys; i++) { - if (copy_from_user(&ovly, (void *) - (ovly_table + i * sizeof (ovly)), - sizeof (ovly))) - goto fail; - - /* The ovly.vma/size/offset arguments are analogous to the same - * arguments used above for non-overlay maps. The final two - * args are referred to as the guard pointer and the guard - * value. - * The guard pointer is an entry in the _ovly_buf_table, - * computed using ovly.buf as the index into the table. Since - * ovly.buf values begin at '1' to reference the first (or 0th) - * entry in the _ovly_buf_table, the computation subtracts 1 - * from ovly.buf. - * The guard value is stored in the _ovly_buf_table entry and - * is an index (starting at 1) back to the _ovly_table entry - * that is pointing at this _ovly_buf_table entry. So, for - * example, for an overlay scenario with one overlay segment - * and two overlay sections: - * - Section 1 points to the first entry of the - * _ovly_buf_table, which contains a guard value - * of '1', referencing the first (index=0) entry of - * _ovly_table. - * - Section 2 points to the second entry of the - * _ovly_buf_table, which contains a guard value - * of '2', referencing the second (index=1) entry of - * _ovly_table. - */ - map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset, - ovly_buf_table_sym + (ovly.buf-1) * 4, i+1); - if (!map) - goto fail; - } - goto out; - - fail: - map = NULL; - out: - return map; -} diff --git a/trunk/arch/powerpc/oprofile/common.c b/trunk/arch/powerpc/oprofile/common.c index a28cce1d6c24..1a7ef7e246d2 100644 --- a/trunk/arch/powerpc/oprofile/common.c +++ b/trunk/arch/powerpc/oprofile/common.c @@ -29,8 +29,6 @@ static struct op_powerpc_model *model; static struct op_counter_config ctr[OP_MAX_COUNTER]; static struct op_system_config sys; -static int op_per_cpu_rc; - static void op_handle_interrupt(struct pt_regs *regs) { model->handle_interrupt(regs, ctr); @@ -38,41 +36,25 @@ static void op_handle_interrupt(struct pt_regs *regs) static void op_powerpc_cpu_setup(void *dummy) { - int ret; - - ret = model->cpu_setup(ctr); - - if (ret != 0) - op_per_cpu_rc = ret; + model->cpu_setup(ctr); } static int op_powerpc_setup(void) { int err; - op_per_cpu_rc = 0; - /* Grab the hardware */ err = reserve_pmc_hardware(op_handle_interrupt); if (err) return err; /* Pre-compute the values to stuff in the hardware registers. */ - op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters); + model->reg_setup(ctr, &sys, model->num_counters); - if (op_per_cpu_rc) - goto out; - - /* Configure the registers on all cpus. If an error occurs on one - * of the cpus, op_per_cpu_rc will be set to the error */ + /* Configure the registers on all cpus. */ on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); -out: if (op_per_cpu_rc) { - /* error on setup release the performance counter hardware */ - release_pmc_hardware(); - } - - return op_per_cpu_rc; + return 0; } static void op_powerpc_shutdown(void) @@ -82,29 +64,16 @@ static void op_powerpc_shutdown(void) static void op_powerpc_cpu_start(void *dummy) { - /* If any of the cpus have return an error, set the - * global flag to the error so it can be returned - * to the generic OProfile caller. - */ - int ret; - - ret = model->start(ctr); - if (ret != 0) - op_per_cpu_rc = ret; + model->start(ctr); } static int op_powerpc_start(void) { - op_per_cpu_rc = 0; - if (model->global_start) - return model->global_start(ctr); - if (model->start) { + model->global_start(ctr); + if (model->start) on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); - return op_per_cpu_rc; - } - return -EIO; /* No start function is defined for this - power architecture */ + return 0; } static inline void op_powerpc_cpu_stop(void *dummy) @@ -178,13 +147,11 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) switch (cur_cpu_spec->oprofile_type) { #ifdef CONFIG_PPC64 -#ifdef CONFIG_OPROFILE_CELL +#ifdef CONFIG_PPC_CELL_NATIVE case PPC_OPROFILE_CELL: if (firmware_has_feature(FW_FEATURE_LPAR)) return -ENODEV; model = &op_model_cell; - ops->sync_start = model->sync_start; - ops->sync_stop = model->sync_stop; break; #endif case PPC_OPROFILE_RS64: diff --git a/trunk/arch/powerpc/oprofile/op_model_7450.c b/trunk/arch/powerpc/oprofile/op_model_7450.c index cc599eb8768b..5d1bbaf35ccb 100644 --- a/trunk/arch/powerpc/oprofile/op_model_7450.c +++ b/trunk/arch/powerpc/oprofile/op_model_7450.c @@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void) /* Configures the counters on this CPU based on the global * settings */ -static int fsl7450_cpu_setup(struct op_counter_config *ctr) +static void fsl7450_cpu_setup(struct op_counter_config *ctr) { /* freeze all counters */ pmc_stop_ctrs(); @@ -89,14 +89,12 @@ static int fsl7450_cpu_setup(struct op_counter_config *ctr) mtspr(SPRN_MMCR0, mmcr0_val); mtspr(SPRN_MMCR1, mmcr1_val); mtspr(SPRN_MMCR2, mmcr2_val); - - return 0; } #define NUM_CTRS 6 /* Configures the global settings for the countes on all CPUs. */ -static int fsl7450_reg_setup(struct op_counter_config *ctr, +static void fsl7450_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { @@ -128,12 +126,10 @@ static int fsl7450_reg_setup(struct op_counter_config *ctr, | mmcr1_event6(ctr[5].event); mmcr2_val = 0; - - return 0; } /* Sets the counters on this CPU to the chosen values, and starts them */ -static int fsl7450_start(struct op_counter_config *ctr) +static void fsl7450_start(struct op_counter_config *ctr) { int i; @@ -152,8 +148,6 @@ static int fsl7450_start(struct op_counter_config *ctr) pmc_start_ctrs(); oprofile_running = 1; - - return 0; } /* Stop the counters on this CPU */ @@ -199,7 +193,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs, /* The freeze bit was set by the interrupt. */ /* Clear the freeze bit, and reenable the interrupt. * The counters won't actually start until the rfi clears - * the PM/M bit */ + * the PMM bit */ pmc_start_ctrs(); } diff --git a/trunk/arch/powerpc/oprofile/op_model_cell.c b/trunk/arch/powerpc/oprofile/op_model_cell.c index d928b54f3a0f..c29293befba9 100644 --- a/trunk/arch/powerpc/oprofile/op_model_cell.c +++ b/trunk/arch/powerpc/oprofile/op_model_cell.c @@ -5,8 +5,8 @@ * * Author: David Erb (djerb@us.ibm.com) * Modifications: - * Carl Love - * Maynard Johnson + * Carl Love + * Maynard Johnson * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -38,25 +38,12 @@ #include "../platforms/cell/interrupt.h" #include "../platforms/cell/cbe_regs.h" -#include "cell/pr_util.h" - -static void cell_global_stop_spu(void); - -/* - * spu_cycle_reset is the number of cycles between samples. - * This variable is used for SPU profiling and should ONLY be set - * at the beginning of cell_reg_setup; otherwise, it's read-only. - */ -static unsigned int spu_cycle_reset; - -#define NUM_SPUS_PER_NODE 8 -#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ -#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying - * PPU_CYCLES event - */ -#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ +#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying + * PPU_CYCLES event + */ +#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ #define NUM_THREADS 2 /* number of physical threads in * physical processor @@ -64,7 +51,6 @@ static unsigned int spu_cycle_reset; #define NUM_TRACE_BUS_WORDS 4 #define NUM_INPUT_BUS_WORDS 2 -#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ struct pmc_cntrl_data { unsigned long vcntr; @@ -76,10 +62,11 @@ struct pmc_cntrl_data { /* * ibm,cbe-perftools rtas parameters */ + struct pm_signal { u16 cpu; /* Processor to modify */ - u16 sub_unit; /* hw subunit this applies to (if applicable)*/ - short int signal_group; /* Signal Group to Enable/Disable */ + u16 sub_unit; /* hw subunit this applies to (if applicable) */ + short int signal_group; /* Signal Group to Enable/Disable */ u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event * Bus Word(s) (bitmask) */ @@ -125,42 +112,21 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; -/* - * The CELL profiling code makes rtas calls to setup the debug bus to - * route the performance signals. Additionally, SPU profiling requires - * a second rtas call to setup the hardware to capture the SPU PCs. - * The EIO error value is returned if the token lookups or the rtas - * call fail. The EIO error number is the best choice of the existing - * error numbers. The probability of rtas related error is very low. But - * by returning EIO and printing additional information to dmsg the user - * will know that OProfile did not start and dmesg will tell them why. - * OProfile does not support returning errors on Stop. Not a huge issue - * since failure to reset the debug bus or stop the SPU PC collection is - * not a fatel issue. Chances are if the Stop failed, Start doesn't work - * either. - */ - -/* - * Interpetation of hdw_thread: +/* Interpetation of hdw_thread: * 0 - even virtual cpus 0, 2, 4,... * 1 - odd virtual cpus 1, 3, 5, ... - * - * FIXME: this is strictly wrong, we need to clean this up in a number - * of places. It works for now. -arnd */ static u32 hdw_thread; static u32 virt_cntr_inter_mask; static struct timer_list timer_virt_cntr; -/* - * pm_signal needs to be global since it is initialized in +/* pm_signal needs to be global since it is initialized in * cell_reg_setup at the time when the necessary information * is available. */ static struct pm_signal pm_signal[NR_PHYS_CTRS]; -static int pm_rtas_token; /* token for debug bus setup call */ -static int spu_rtas_token; /* token for SPU cycle profiling */ +static int pm_rtas_token; static u32 reset_value[NR_PHYS_CTRS]; static int num_counters; @@ -181,8 +147,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru, { u64 paddr = __pa(address); - return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, - passthru, paddr >> 32, paddr & 0xffffffff, length); + return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, + paddr >> 32, paddr & 0xffffffff, length); } static void pm_rtas_reset_signals(u32 node) @@ -190,13 +156,12 @@ static void pm_rtas_reset_signals(u32 node) int ret; struct pm_signal pm_signal_local; - /* - * The debug bus is being set to the passthru disable state. - * However, the FW still expects atleast one legal signal routing - * entry or it will return an error on the arguments. If we don't - * supply a valid entry, we must ignore all return values. Ignoring - * all return values means we might miss an error we should be - * concerned about. + /* The debug bus is being set to the passthru disable state. + * However, the FW still expects atleast one legal signal routing + * entry or it will return an error on the arguments. If we don't + * supply a valid entry, we must ignore all return values. Ignoring + * all return values means we might miss an error we should be + * concerned about. */ /* fw expects physical cpu #. */ @@ -210,24 +175,18 @@ static void pm_rtas_reset_signals(u32 node) &pm_signal_local, sizeof(struct pm_signal)); - if (unlikely(ret)) - /* - * Not a fatal error. For Oprofile stop, the oprofile - * functions do not support returning an error for - * failure to stop OProfile. - */ + if (ret) printk(KERN_WARNING "%s: rtas returned: %d\n", __FUNCTION__, ret); } -static int pm_rtas_activate_signals(u32 node, u32 count) +static void pm_rtas_activate_signals(u32 node, u32 count) { int ret; int i, j; struct pm_signal pm_signal_local[NR_PHYS_CTRS]; - /* - * There is no debug setup required for the cycles event. + /* There is no debug setup required for the cycles event. * Note that only events in the same group can be used. * Otherwise, there will be conflicts in correctly routing * the signals on the debug bus. It is the responsiblity @@ -254,14 +213,10 @@ static int pm_rtas_activate_signals(u32 node, u32 count) pm_signal_local, i * sizeof(struct pm_signal)); - if (unlikely(ret)) { + if (ret) printk(KERN_WARNING "%s: rtas returned: %d\n", __FUNCTION__, ret); - return -EIO; - } } - - return 0; } /* @@ -305,12 +260,11 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); - /* - * Some of the islands signal selection is based on 64 bit words. + /* Some of the islands signal selection is based on 64 bit words. * The debug bus words are 32 bits, the input words to the performance * counters are defined as 32 bits. Need to convert the 64 bit island * specification to the appropriate 32 input bit and bus word for the - * performance counter event selection. See the CELL Performance + * performance counter event selection. See the CELL Performance * monitoring signals manual and the Perf cntr hardware descriptions * for the details. */ @@ -344,7 +298,6 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) input_bus[j] = i; pm_regs.group_control |= (i << (31 - i)); - break; } } @@ -356,8 +309,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) static void write_pm_cntrl(int cpu) { - /* - * Oprofile will use 32 bit counters, set bits 7:10 to 0 + /* Oprofile will use 32 bit counters, set bits 7:10 to 0 * pmregs.pm_cntrl is a global */ @@ -374,8 +326,7 @@ static void write_pm_cntrl(int cpu) if (pm_regs.pm_cntrl.freeze == 1) val |= CBE_PM_FREEZE_ALL_CTRS; - /* - * Routine set_count_mode must be called previously to set + /* Routine set_count_mode must be called previously to set * the count mode based on the user selection of user and kernel. */ val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); @@ -385,8 +336,7 @@ static void write_pm_cntrl(int cpu) static inline void set_count_mode(u32 kernel, u32 user) { - /* - * The user must specify user and kernel if they want them. If + /* The user must specify user and kernel if they want them. If * neither is specified, OProfile will count in hypervisor mode. * pm_regs.pm_cntrl is a global */ @@ -414,7 +364,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) /* * Oprofile is expected to collect data on all CPUs simultaneously. - * However, there is one set of performance counters per node. There are + * However, there is one set of performance counters per node. There are * two hardware threads or virtual CPUs on each node. Hence, OProfile must * multiplex in time the performance counter collection on the two virtual * CPUs. The multiplexing of the performance counters is done by this @@ -427,19 +377,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) * pair of per-cpu arrays is used for storing the previous and next * pmc values for a given node. * NOTE: We use the per-cpu variable to improve cache performance. - * - * This routine will alternate loading the virtual counters for - * virtual CPUs */ static void cell_virtual_cntr(unsigned long data) { + /* This routine will alternate loading the virtual counters for + * virtual CPUs + */ int i, prev_hdw_thread, next_hdw_thread; u32 cpu; unsigned long flags; - /* - * Make sure that the interrupt_hander and the virt counter are - * not both playing with the counters on the same node. + /* Make sure that the interrupt_hander and + * the virt counter are not both playing with + * the counters on the same node. */ spin_lock_irqsave(&virt_cntr_lock, flags); @@ -450,25 +400,22 @@ static void cell_virtual_cntr(unsigned long data) hdw_thread = 1 ^ hdw_thread; next_hdw_thread = hdw_thread; - /* - * There are some per thread events. Must do the + for (i = 0; i < num_counters; i++) + /* There are some per thread events. Must do the * set event, for the thread that is being started */ - for (i = 0; i < num_counters; i++) set_pm_event(i, pmc_cntrl[next_hdw_thread][i].evnts, pmc_cntrl[next_hdw_thread][i].masks); - /* - * The following is done only once per each node, but + /* The following is done only once per each node, but * we need cpu #, not node #, to pass to the cbe_xxx functions. */ for_each_online_cpu(cpu) { if (cbe_get_hw_thread_id(cpu)) continue; - /* - * stop counters, save counter values, restore counts + /* stop counters, save counter values, restore counts * for previous thread */ cbe_disable_pm(cpu); @@ -481,7 +428,7 @@ static void cell_virtual_cntr(unsigned long data) == 0xFFFFFFFF) /* If the cntr value is 0xffffffff, we must * reset that to 0xfffffff0 when the current - * thread is restarted. This will generate a + * thread is restarted. This will generate a * new interrupt and make sure that we never * restore the counters to the max value. If * the counters were restored to the max value, @@ -497,15 +444,13 @@ static void cell_virtual_cntr(unsigned long data) next_hdw_thread)[i]); } - /* - * Switch to the other thread. Change the interrupt + /* Switch to the other thread. Change the interrupt * and control regs to be scheduled on the CPU * corresponding to the thread to execute. */ for (i = 0; i < num_counters; i++) { if (pmc_cntrl[next_hdw_thread][i].enabled) { - /* - * There are some per thread events. + /* There are some per thread events. * Must do the set event, enable_cntr * for each cpu. */ @@ -537,42 +482,17 @@ static void start_virt_cntrs(void) } /* This function is called once for all cpus combined */ -static int cell_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, int num_ctrs) +static void +cell_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, int num_ctrs) { int i, j, cpu; - spu_cycle_reset = 0; - - if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { - spu_cycle_reset = ctr[0].count; - - /* - * Each node will need to make the rtas call to start - * and stop SPU profiling. Get the token once and store it. - */ - spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); - - if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { - printk(KERN_ERR - "%s: rtas token ibm,cbe-spu-perftools unknown\n", - __FUNCTION__); - return -EIO; - } - } pm_rtas_token = rtas_token("ibm,cbe-perftools"); - - /* - * For all events excetp PPU CYCLEs, each node will need to make - * the rtas cbe-perftools call to setup and reset the debug bus. - * Make the token lookup call once and store it in the global - * variable pm_rtas_token. - */ - if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { - printk(KERN_ERR - "%s: rtas token ibm,cbe-perftools unknown\n", + if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { + printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", __FUNCTION__); - return -EIO; + goto out; } num_counters = num_ctrs; @@ -600,8 +520,7 @@ static int cell_reg_setup(struct op_counter_config *ctr, per_cpu(pmc_values, j)[i] = 0; } - /* - * Setup the thread 1 events, map the thread 0 event to the + /* Setup the thread 1 events, map the thread 0 event to the * equivalent thread 1 event. */ for (i = 0; i < num_ctrs; ++i) { @@ -625,10 +544,9 @@ static int cell_reg_setup(struct op_counter_config *ctr, for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) input_bus[i] = 0xff; - /* - * Our counters count up, and "count" refers to + /* Our counters count up, and "count" refers to * how much before the next interrupt, and we interrupt - * on overflow. So we calculate the starting value + * on overflow. So we calculate the starting value * which will give us "count" until overflow. * Then we set the events on the enabled counters. */ @@ -651,27 +569,28 @@ static int cell_reg_setup(struct op_counter_config *ctr, for (i = 0; i < num_counters; ++i) { per_cpu(pmc_values, cpu)[i] = reset_value[i]; } - - return 0; +out: + ; } - - /* This function is called once for each cpu */ -static int cell_cpu_setup(struct op_counter_config *cntr) +static void cell_cpu_setup(struct op_counter_config *cntr) { u32 cpu = smp_processor_id(); u32 num_enabled = 0; int i; - if (spu_cycle_reset) - return 0; - /* There is one performance monitor per processor chip (i.e. node), * so we only need to perform this function once per node. */ if (cbe_get_hw_thread_id(cpu)) - return 0; + goto out; + + if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { + printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", + __FUNCTION__); + goto out; + } /* Stop all counters */ cbe_disable_pm(cpu); @@ -690,286 +609,16 @@ static int cell_cpu_setup(struct op_counter_config *cntr) } } - /* - * The pm_rtas_activate_signals will return -EIO if the FW - * call failed. - */ - return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); -} - -#define ENTRIES 303 -#define MAXLFSR 0xFFFFFF - -/* precomputed table of 24 bit LFSR values */ -static int initial_lfsr[] = { - 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424, - 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716, - 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547, - 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392, - 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026, - 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556, - 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769, - 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893, - 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017, - 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756, - 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558, - 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401, - 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720, - 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042, - 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955, - 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934, - 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783, - 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278, - 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051, - 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741, - 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972, - 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302, - 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384, - 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469, - 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697, - 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398, - 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140, - 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214, - 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386, - 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087, - 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130, - 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300, - 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475, - 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950, - 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003, - 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375, - 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426, - 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607 -}; - -/* - * The hardware uses an LFSR counting sequence to determine when to capture - * the SPU PCs. An LFSR sequence is like a puesdo random number sequence - * where each number occurs once in the sequence but the sequence is not in - * numerical order. The SPU PC capture is done when the LFSR sequence reaches - * the last value in the sequence. Hence the user specified value N - * corresponds to the LFSR number that is N from the end of the sequence. - * - * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit - * LFSR sequence is broken into four ranges. The spacing of the precomputed - * values is adjusted in each range so the error between the user specifed - * number (N) of events between samples and the actual number of events based - * on the precomputed value will be les then about 6.2%. Note, if the user - * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. - * This is to prevent the loss of samples because the trace buffer is full. - * - * User specified N Step between Index in - * precomputed values precomputed - * table - * 0 to 2^16-1 ---- 0 - * 2^16 to 2^16+2^19-1 2^12 1 to 128 - * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256 - * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302 - * - * - * For example, the LFSR values in the second range are computed for 2^16, - * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies - * 1, 2,..., 127, 128. - * - * The 24 bit LFSR value for the nth number in the sequence can be - * calculated using the following code: - * - * #define size 24 - * int calculate_lfsr(int n) - * { - * int i; - * unsigned int newlfsr0; - * unsigned int lfsr = 0xFFFFFF; - * unsigned int howmany = n; - * - * for (i = 2; i < howmany + 2; i++) { - * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^ - * ((lfsr >> (size - 1 - 1)) & 1) ^ - * (((lfsr >> (size - 1 - 6)) & 1) ^ - * ((lfsr >> (size - 1 - 23)) & 1))); - * - * lfsr >>= 1; - * lfsr = lfsr | (newlfsr0 << (size - 1)); - * } - * return lfsr; - * } - */ - -#define V2_16 (0x1 << 16) -#define V2_19 (0x1 << 19) -#define V2_22 (0x1 << 22) - -static int calculate_lfsr(int n) -{ - /* - * The ranges and steps are in powers of 2 so the calculations - * can be done using shifts rather then divide. - */ - int index; - - if ((n >> 16) == 0) - index = 0; - else if (((n - V2_16) >> 19) == 0) - index = ((n - V2_16) >> 12) + 1; - else if (((n - V2_16 - V2_19) >> 22) == 0) - index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128; - else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) - index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256; - else - index = ENTRIES-1; - - /* make sure index is valid */ - if ((index > ENTRIES) || (index < 0)) - index = ENTRIES-1; - - return initial_lfsr[index]; -} - -static int pm_rtas_activate_spu_profiling(u32 node) -{ - int ret, i; - struct pm_signal pm_signal_local[NR_PHYS_CTRS]; - - /* - * Set up the rtas call to configure the debug bus to - * route the SPU PCs. Setup the pm_signal for each SPU - */ - for (i = 0; i < NUM_SPUS_PER_NODE; i++) { - pm_signal_local[i].cpu = node; - pm_signal_local[i].signal_group = 41; - /* spu i on word (i/2) */ - pm_signal_local[i].bus_word = 1 << i / 2; - /* spu i */ - pm_signal_local[i].sub_unit = i; - pm_signal_local[i].bit = 63; - } - - ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, - PASSTHRU_ENABLE, pm_signal_local, - (NUM_SPUS_PER_NODE - * sizeof(struct pm_signal))); - - if (unlikely(ret)) { - printk(KERN_WARNING "%s: rtas returned: %d\n", - __FUNCTION__, ret); - return -EIO; - } - - return 0; -} - -#ifdef CONFIG_CPU_FREQ -static int -oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) -{ - int ret = 0; - struct cpufreq_freqs *frq = data; - if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || - (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) || - (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) - set_spu_profiling_frequency(frq->new, spu_cycle_reset); - return ret; -} - -static struct notifier_block cpu_freq_notifier_block = { - .notifier_call = oprof_cpufreq_notify -}; -#endif - -static int cell_global_start_spu(struct op_counter_config *ctr) -{ - int subfunc; - unsigned int lfsr_value; - int cpu; - int ret; - int rtas_error; - unsigned int cpu_khzfreq = 0; - - /* The SPU profiling uses time-based profiling based on - * cpu frequency, so if configured with the CPU_FREQ - * option, we should detect frequency changes and react - * accordingly. - */ -#ifdef CONFIG_CPU_FREQ - ret = cpufreq_register_notifier(&cpu_freq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (ret < 0) - /* this is not a fatal error */ - printk(KERN_ERR "CPU freq change registration failed: %d\n", - ret); - - else - cpu_khzfreq = cpufreq_quick_get(smp_processor_id()); -#endif - - set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset); - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - /* - * Setup SPU cycle-based profiling. - * Set perf_mon_control bit 0 to a zero before - * enabling spu collection hardware. - */ - cbe_write_pm(cpu, pm_control, 0); - - if (spu_cycle_reset > MAX_SPU_COUNT) - /* use largest possible value */ - lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1); - else - lfsr_value = calculate_lfsr(spu_cycle_reset); - - /* must use a non zero value. Zero disables data collection. */ - if (lfsr_value == 0) - lfsr_value = calculate_lfsr(1); - - lfsr_value = lfsr_value << 8; /* shift lfsr to correct - * register location - */ - - /* debug bus setup */ - ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu)); - - if (unlikely(ret)) { - rtas_error = ret; - goto out; - } - - - subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */ - - /* start profiling */ - ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, - cbe_cpu_to_node(cpu), lfsr_value); - - if (unlikely(ret != 0)) { - printk(KERN_ERR - "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", - __FUNCTION__, ret); - rtas_error = -EIO; - goto out; - } - } - - rtas_error = start_spu_profiling(spu_cycle_reset); - if (rtas_error) - goto out_stop; - - oprofile_running = 1; - return 0; - -out_stop: - cell_global_stop_spu(); /* clean up the PMU/debug bus */ + pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); out: - return rtas_error; + ; } -static int cell_global_start_ppu(struct op_counter_config *ctr) +static void cell_global_start(struct op_counter_config *ctr) { - u32 cpu, i; + u32 cpu; u32 interrupt_mask = 0; + u32 i; /* This routine gets called once for the system. * There is one performance monitor per node, so we @@ -1002,79 +651,19 @@ static int cell_global_start_ppu(struct op_counter_config *ctr) oprofile_running = 1; smp_wmb(); - /* - * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being - * executed which manipulates the PMU. We start the "virtual counter" + /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being + * executed which manipulates the PMU. We start the "virtual counter" * here so that we do not need to synchronize access to the PMU in * the above for-loop. */ start_virt_cntrs(); - - return 0; } -static int cell_global_start(struct op_counter_config *ctr) -{ - if (spu_cycle_reset) - return cell_global_start_spu(ctr); - else - return cell_global_start_ppu(ctr); -} - -/* - * Note the generic OProfile stop calls do not support returning - * an error on stop. Hence, will not return an error if the FW - * calls fail on stop. Failure to reset the debug bus is not an issue. - * Failure to disable the SPU profiling is not an issue. The FW calls - * to enable the performance counters and debug bus will work even if - * the hardware was not cleanly reset. - */ -static void cell_global_stop_spu(void) -{ - int subfunc, rtn_value; - unsigned int lfsr_value; - int cpu; - - oprofile_running = 0; - -#ifdef CONFIG_CPU_FREQ - cpufreq_unregister_notifier(&cpu_freq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); -#endif - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - subfunc = 3; /* - * 2 - activate SPU tracing, - * 3 - deactivate - */ - lfsr_value = 0x8f100000; - - rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, - subfunc, cbe_cpu_to_node(cpu), - lfsr_value); - - if (unlikely(rtn_value != 0)) { - printk(KERN_ERR - "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", - __FUNCTION__, rtn_value); - } - - /* Deactivate the signals */ - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); - } - - stop_spu_profiling(); -} - -static void cell_global_stop_ppu(void) +static void cell_global_stop(void) { int cpu; - /* - * This routine will be called once for the system. + /* This routine will be called once for the system. * There is one performance monitor per node, so we * only need to perform this function once per node. */ @@ -1098,16 +687,8 @@ static void cell_global_stop_ppu(void) } } -static void cell_global_stop(void) -{ - if (spu_cycle_reset) - cell_global_stop_spu(); - else - cell_global_stop_ppu(); -} - -static void cell_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) +static void +cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { u32 cpu; u64 pc; @@ -1118,15 +699,13 @@ static void cell_handle_interrupt(struct pt_regs *regs, cpu = smp_processor_id(); - /* - * Need to make sure the interrupt handler and the virt counter + /* Need to make sure the interrupt handler and the virt counter * routine are not running at the same time. See the * cell_virtual_cntr() routine for additional comments. */ spin_lock_irqsave(&virt_cntr_lock, flags); - /* - * Need to disable and reenable the performance counters + /* Need to disable and reenable the performance counters * to get the desired behavior from the hardware. This * is hardware specific. */ @@ -1135,8 +714,7 @@ static void cell_handle_interrupt(struct pt_regs *regs, interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); - /* - * If the interrupt mask has been cleared, then the virt cntr + /* If the interrupt mask has been cleared, then the virt cntr * has cleared the interrupt. When the thread that generated * the interrupt is restored, the data count will be restored to * 0xffffff0 to cause the interrupt to be regenerated. @@ -1154,20 +732,18 @@ static void cell_handle_interrupt(struct pt_regs *regs, } } - /* - * The counters were frozen by the interrupt. + /* The counters were frozen by the interrupt. * Reenable the interrupt and restart the counters. * If there was a race between the interrupt handler and - * the virtual counter routine. The virutal counter + * the virtual counter routine. The virutal counter * routine may have cleared the interrupts. Hence must * use the virt_cntr_inter_mask to re-enable the interrupts. */ cbe_enable_pm_interrupts(cpu, hdw_thread, virt_cntr_inter_mask); - /* - * The writes to the various performance counters only writes - * to a latch. The new values (interrupt setting bits, reset + /* The writes to the various performance counters only writes + * to a latch. The new values (interrupt setting bits, reset * counter value etc.) are not copied to the actual registers * until the performance monitor is enabled. In order to get * this to work as desired, the permormance monitor needs to @@ -1179,33 +755,10 @@ static void cell_handle_interrupt(struct pt_regs *regs, spin_unlock_irqrestore(&virt_cntr_lock, flags); } -/* - * This function is called from the generic OProfile - * driver. When profiling PPUs, we need to do the - * generic sync start; otherwise, do spu_sync_start. - */ -static int cell_sync_start(void) -{ - if (spu_cycle_reset) - return spu_sync_start(); - else - return DO_GENERIC_SYNC; -} - -static int cell_sync_stop(void) -{ - if (spu_cycle_reset) - return spu_sync_stop(); - else - return 1; -} - struct op_powerpc_model op_model_cell = { .reg_setup = cell_reg_setup, .cpu_setup = cell_cpu_setup, .global_start = cell_global_start, .global_stop = cell_global_stop, - .sync_start = cell_sync_start, - .sync_stop = cell_sync_stop, .handle_interrupt = cell_handle_interrupt, }; diff --git a/trunk/arch/powerpc/oprofile/op_model_fsl_booke.c b/trunk/arch/powerpc/oprofile/op_model_fsl_booke.c index 183a28bb1812..2267eb8c661b 100644 --- a/trunk/arch/powerpc/oprofile/op_model_fsl_booke.c +++ b/trunk/arch/powerpc/oprofile/op_model_fsl_booke.c @@ -244,7 +244,7 @@ static void dump_pmcs(void) mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); } -static int fsl_booke_cpu_setup(struct op_counter_config *ctr) +static void fsl_booke_cpu_setup(struct op_counter_config *ctr) { int i; @@ -258,11 +258,9 @@ static int fsl_booke_cpu_setup(struct op_counter_config *ctr) set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); } - - return 0; } -static int fsl_booke_reg_setup(struct op_counter_config *ctr, +static void fsl_booke_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { @@ -278,10 +276,9 @@ static int fsl_booke_reg_setup(struct op_counter_config *ctr, for (i = 0; i < num_counters; ++i) reset_value[i] = 0x80000000UL - ctr[i].count; - return 0; } -static int fsl_booke_start(struct op_counter_config *ctr) +static void fsl_booke_start(struct op_counter_config *ctr) { int i; @@ -311,8 +308,6 @@ static int fsl_booke_start(struct op_counter_config *ctr) pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), mfpmr(PMRN_PMGC0)); - - return 0; } static void fsl_booke_stop(void) diff --git a/trunk/arch/powerpc/oprofile/op_model_pa6t.c b/trunk/arch/powerpc/oprofile/op_model_pa6t.c index c40de461fd4e..e8a56b0adadc 100644 --- a/trunk/arch/powerpc/oprofile/op_model_pa6t.c +++ b/trunk/arch/powerpc/oprofile/op_model_pa6t.c @@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val) /* precompute the values to stuff in the hardware registers */ -static int pa6t_reg_setup(struct op_counter_config *ctr, +static void pa6t_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { @@ -135,12 +135,10 @@ static int pa6t_reg_setup(struct op_counter_config *ctr, pr_debug("reset_value for pmc%u inited to 0x%lx\n", pmc, reset_value[pmc]); } - - return 0; } /* configure registers on this cpu */ -static int pa6t_cpu_setup(struct op_counter_config *ctr) +static void pa6t_cpu_setup(struct op_counter_config *ctr) { u64 mmcr0 = mmcr0_val; u64 mmcr1 = mmcr1_val; @@ -156,11 +154,9 @@ static int pa6t_cpu_setup(struct op_counter_config *ctr) mfspr(SPRN_PA6T_MMCR0)); pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), mfspr(SPRN_PA6T_MMCR1)); - - return 0; } -static int pa6t_start(struct op_counter_config *ctr) +static void pa6t_start(struct op_counter_config *ctr) { int i; @@ -178,8 +174,6 @@ static int pa6t_start(struct op_counter_config *ctr) oprofile_running = 1; pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); - - return 0; } static void pa6t_stop(void) diff --git a/trunk/arch/powerpc/oprofile/op_model_power4.c b/trunk/arch/powerpc/oprofile/op_model_power4.c index cddc250a6a5c..a7c206b665af 100644 --- a/trunk/arch/powerpc/oprofile/op_model_power4.c +++ b/trunk/arch/powerpc/oprofile/op_model_power4.c @@ -32,7 +32,7 @@ static u32 mmcr0_val; static u64 mmcr1_val; static u64 mmcra_val; -static int power4_reg_setup(struct op_counter_config *ctr, +static void power4_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { @@ -60,8 +60,6 @@ static int power4_reg_setup(struct op_counter_config *ctr, mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; else mmcr0_val |= MMCR0_PROBLEM_DISABLE; - - return 0; } extern void ppc64_enable_pmcs(void); @@ -86,7 +84,7 @@ static inline int mmcra_must_set_sample(void) return 0; } -static int power4_cpu_setup(struct op_counter_config *ctr) +static void power4_cpu_setup(struct op_counter_config *ctr) { unsigned int mmcr0 = mmcr0_val; unsigned long mmcra = mmcra_val; @@ -113,11 +111,9 @@ static int power4_cpu_setup(struct op_counter_config *ctr) mfspr(SPRN_MMCR1)); dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), mfspr(SPRN_MMCRA)); - - return 0; } -static int power4_start(struct op_counter_config *ctr) +static void power4_start(struct op_counter_config *ctr) { int i; unsigned int mmcr0; @@ -152,7 +148,6 @@ static int power4_start(struct op_counter_config *ctr) oprofile_running = 1; dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - return 0; } static void power4_stop(void) diff --git a/trunk/arch/powerpc/oprofile/op_model_rs64.c b/trunk/arch/powerpc/oprofile/op_model_rs64.c index a20afe45d936..c731acbfb2a5 100644 --- a/trunk/arch/powerpc/oprofile/op_model_rs64.c +++ b/trunk/arch/powerpc/oprofile/op_model_rs64.c @@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER]; static int num_counters; -static int rs64_reg_setup(struct op_counter_config *ctr, +static void rs64_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { @@ -100,10 +100,9 @@ static int rs64_reg_setup(struct op_counter_config *ctr, reset_value[i] = 0x80000000UL - ctr[i].count; /* XXX setup user and kernel profiling */ - return 0; } -static int rs64_cpu_setup(struct op_counter_config *ctr) +static void rs64_cpu_setup(struct op_counter_config *ctr) { unsigned int mmcr0; @@ -126,11 +125,9 @@ static int rs64_cpu_setup(struct op_counter_config *ctr) mfspr(SPRN_MMCR0)); dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), mfspr(SPRN_MMCR1)); - - return 0; } -static int rs64_start(struct op_counter_config *ctr) +static void rs64_start(struct op_counter_config *ctr) { int i; unsigned int mmcr0; @@ -158,7 +155,6 @@ static int rs64_start(struct op_counter_config *ctr) mtspr(SPRN_MMCR0, mmcr0); dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - return 0; } static void rs64_stop(void) diff --git a/trunk/arch/powerpc/platforms/Kconfig b/trunk/arch/powerpc/platforms/Kconfig index 932538a93c2b..33545d352e92 100644 --- a/trunk/arch/powerpc/platforms/Kconfig +++ b/trunk/arch/powerpc/platforms/Kconfig @@ -272,14 +272,4 @@ config CPM2 you wish to build a kernel for a machine with a CPM2 coprocessor on it (826x, 827x, 8560). -config AXON_RAM - tristate "Axon DDR2 memory device driver" - depends on PPC_IBM_CELL_BLADE - default m - help - It registers one block device per Axon's DDR2 memory bank found - on a system. Block devices are called axonram?, their major and - minor numbers are available in /proc/devices, /proc/partitions or - in /sys/block/axonram?/dev. - endmenu diff --git a/trunk/arch/powerpc/platforms/cell/Kconfig b/trunk/arch/powerpc/platforms/cell/Kconfig index ac8032034fb8..9b2b386ccf48 100644 --- a/trunk/arch/powerpc/platforms/cell/Kconfig +++ b/trunk/arch/powerpc/platforms/cell/Kconfig @@ -73,14 +73,4 @@ config CBE_CPUFREQ For details, take a look at . If you don't have such processor, say N -config CBE_CPUFREQ_PMI - tristate "CBE frequency scaling using PMI interface" - depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL - default n - help - Select this, if you want to use the PMI interface - to switch frequencies. Using PMI, the - processor will not only be able to run at lower speed, - but also at lower core voltage. - endmenu diff --git a/trunk/arch/powerpc/platforms/cell/Makefile b/trunk/arch/powerpc/platforms/cell/Makefile index f88a7c76f296..869af89df6ff 100644 --- a/trunk/arch/powerpc/platforms/cell/Makefile +++ b/trunk/arch/powerpc/platforms/cell/Makefile @@ -4,9 +4,7 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \ obj-$(CONFIG_CBE_RAS) += ras.o obj-$(CONFIG_CBE_THERM) += cbe_thermal.o -obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o -obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o -cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o +obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o @@ -25,5 +23,3 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ $(spu-priv1-y) \ $(spu-manage-y) \ spufs/ - -obj-$(CONFIG_PCI_MSI) += axon_msi.o diff --git a/trunk/arch/powerpc/platforms/cell/axon_msi.c b/trunk/arch/powerpc/platforms/cell/axon_msi.c deleted file mode 100644 index 4c9ab5b70bae..000000000000 --- a/trunk/arch/powerpc/platforms/cell/axon_msi.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright 2007, Michael Ellerman, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -/* - * MSIC registers, specified as offsets from dcr_base - */ -#define MSIC_CTRL_REG 0x0 - -/* Base Address registers specify FIFO location in BE memory */ -#define MSIC_BASE_ADDR_HI_REG 0x3 -#define MSIC_BASE_ADDR_LO_REG 0x4 - -/* Hold the read/write offsets into the FIFO */ -#define MSIC_READ_OFFSET_REG 0x5 -#define MSIC_WRITE_OFFSET_REG 0x6 - - -/* MSIC control register flags */ -#define MSIC_CTRL_ENABLE 0x0001 -#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002 -#define MSIC_CTRL_IRQ_ENABLE 0x0008 -#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010 - -/* - * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB. - * Currently we're using a 64KB FIFO size. - */ -#define MSIC_FIFO_SIZE_SHIFT 16 -#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT) - -/* - * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits - * 8-9 of the MSIC control reg. - */ -#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300) - -/* - * We need to mask the read/write offsets to make sure they stay within - * the bounds of the FIFO. Also they should always be 16-byte aligned. - */ -#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu) - -/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */ -#define MSIC_FIFO_ENTRY_SIZE 0x10 - - -struct axon_msic { - struct device_node *dn; - struct irq_host *irq_host; - __le32 *fifo; - dcr_host_t dcr_host; - struct list_head list; - u32 read_offset; - u32 dcr_base; -}; - -static LIST_HEAD(axon_msic_list); - -static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) -{ - pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); - - dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val); -} - -static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n) -{ - return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n); -} - -static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) -{ - struct axon_msic *msic = get_irq_data(irq); - u32 write_offset, msi; - int idx; - - write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG); - pr_debug("axon_msi: original write_offset 0x%x\n", write_offset); - - /* write_offset doesn't wrap properly, so we have to mask it */ - write_offset &= MSIC_FIFO_SIZE_MASK; - - while (msic->read_offset != write_offset) { - idx = msic->read_offset / sizeof(__le32); - msi = le32_to_cpu(msic->fifo[idx]); - msi &= 0xFFFF; - - pr_debug("axon_msi: woff %x roff %x msi %x\n", - write_offset, msic->read_offset, msi); - - msic->read_offset += MSIC_FIFO_ENTRY_SIZE; - msic->read_offset &= MSIC_FIFO_SIZE_MASK; - - if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) - generic_handle_irq(msi); - else - pr_debug("axon_msi: invalid irq 0x%x!\n", msi); - } - - desc->chip->eoi(irq); -} - -static struct axon_msic *find_msi_translator(struct pci_dev *dev) -{ - struct irq_host *irq_host; - struct device_node *dn, *tmp; - const phandle *ph; - struct axon_msic *msic = NULL; - - dn = pci_device_to_OF_node(dev); - if (!dn) { - dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); - return NULL; - } - - for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { - ph = of_get_property(dn, "msi-translator", NULL); - if (ph) - break; - } - - if (!ph) { - dev_dbg(&dev->dev, - "axon_msi: no msi-translator property found\n"); - goto out_error; - } - - tmp = dn; - dn = of_find_node_by_phandle(*ph); - if (!dn) { - dev_dbg(&dev->dev, - "axon_msi: msi-translator doesn't point to a node\n"); - goto out_error; - } - - irq_host = irq_find_host(dn); - if (!irq_host) { - dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n", - dn->full_name); - goto out_error; - } - - msic = irq_host->host_data; - -out_error: - of_node_put(dn); - of_node_put(tmp); - - return msic; -} - -static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type) -{ - if (!find_msi_translator(dev)) - return -ENODEV; - - return 0; -} - -static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) -{ - struct device_node *dn, *tmp; - struct msi_desc *entry; - int len; - const u32 *prop; - - dn = pci_device_to_OF_node(dev); - if (!dn) { - dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); - return -ENODEV; - } - - entry = list_first_entry(&dev->msi_list, struct msi_desc, list); - - for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { - if (entry->msi_attrib.is_64) { - prop = of_get_property(dn, "msi-address-64", &len); - if (prop) - break; - } - - prop = of_get_property(dn, "msi-address-32", &len); - if (prop) - break; - } - - if (!prop) { - dev_dbg(&dev->dev, - "axon_msi: no msi-address-(32|64) properties found\n"); - return -ENOENT; - } - - switch (len) { - case 8: - msg->address_hi = prop[0]; - msg->address_lo = prop[1]; - break; - case 4: - msg->address_hi = 0; - msg->address_lo = prop[0]; - break; - default: - dev_dbg(&dev->dev, - "axon_msi: malformed msi-address-(32|64) property\n"); - of_node_put(dn); - return -EINVAL; - } - - of_node_put(dn); - - return 0; -} - -static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - unsigned int virq, rc; - struct msi_desc *entry; - struct msi_msg msg; - struct axon_msic *msic; - - msic = find_msi_translator(dev); - if (!msic) - return -ENODEV; - - rc = setup_msi_msg_address(dev, &msg); - if (rc) - return rc; - - /* We rely on being able to stash a virq in a u16 */ - BUILD_BUG_ON(NR_IRQS > 65536); - - list_for_each_entry(entry, &dev->msi_list, list) { - virq = irq_create_direct_mapping(msic->irq_host); - if (virq == NO_IRQ) { - dev_warn(&dev->dev, - "axon_msi: virq allocation failed!\n"); - return -1; - } - dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq); - - set_irq_msi(virq, entry); - msg.data = virq; - write_msi_msg(virq, &msg); - } - - return 0; -} - -static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) -{ - struct msi_desc *entry; - - dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); - - list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq == NO_IRQ) - continue; - - set_irq_msi(entry->irq, NULL); - irq_dispose_mapping(entry->irq); - } -} - -static struct irq_chip msic_irq_chip = { - .mask = mask_msi_irq, - .unmask = unmask_msi_irq, - .shutdown = unmask_msi_irq, - .typename = "AXON-MSI", -}; - -static int msic_host_map(struct irq_host *h, unsigned int virq, - irq_hw_number_t hw) -{ - set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); - - return 0; -} - -static int msic_host_match(struct irq_host *host, struct device_node *dn) -{ - struct axon_msic *msic = host->host_data; - - return msic->dn == dn; -} - -static struct irq_host_ops msic_host_ops = { - .match = msic_host_match, - .map = msic_host_map, -}; - -static int axon_msi_notify_reboot(struct notifier_block *nb, - unsigned long code, void *data) -{ - struct axon_msic *msic; - u32 tmp; - - list_for_each_entry(msic, &axon_msic_list, list) { - pr_debug("axon_msi: disabling %s\n", msic->dn->full_name); - tmp = msic_dcr_read(msic, MSIC_CTRL_REG); - tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; - msic_dcr_write(msic, MSIC_CTRL_REG, tmp); - } - - return 0; -} - -static struct notifier_block axon_msi_reboot_notifier = { - .notifier_call = axon_msi_notify_reboot -}; - -static int axon_msi_setup_one(struct device_node *dn) -{ - struct page *page; - struct axon_msic *msic; - unsigned int virq; - int dcr_len; - - pr_debug("axon_msi: setting up dn %s\n", dn->full_name); - - msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); - if (!msic) { - printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n", - dn->full_name); - goto out; - } - - msic->dcr_base = dcr_resource_start(dn, 0); - dcr_len = dcr_resource_len(dn, 0); - - if (msic->dcr_base == 0 || dcr_len == 0) { - printk(KERN_ERR - "axon_msi: couldn't parse dcr properties on %s\n", - dn->full_name); - goto out; - } - - msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len); - if (!DCR_MAP_OK(msic->dcr_host)) { - printk(KERN_ERR "axon_msi: dcr_map failed for %s\n", - dn->full_name); - goto out_free_msic; - } - - page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL, - get_order(MSIC_FIFO_SIZE_BYTES)); - if (!page) { - printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n", - dn->full_name); - goto out_free_msic; - } - - msic->fifo = page_address(page); - - msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS, - &msic_host_ops, 0); - if (!msic->irq_host) { - printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n", - dn->full_name); - goto out_free_fifo; - } - - msic->irq_host->host_data = msic; - - virq = irq_of_parse_and_map(dn, 0); - if (virq == NO_IRQ) { - printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n", - dn->full_name); - goto out_free_host; - } - - msic->dn = of_node_get(dn); - - set_irq_data(virq, msic); - set_irq_chained_handler(virq, axon_msi_cascade); - pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq); - - /* Enable the MSIC hardware */ - msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32); - msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG, - (u64)msic->fifo & 0xFFFFFFFF); - msic_dcr_write(msic, MSIC_CTRL_REG, - MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | - MSIC_CTRL_FIFO_SIZE); - - list_add(&msic->list, &axon_msic_list); - - printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name); - - return 0; - -out_free_host: - kfree(msic->irq_host); -out_free_fifo: - __free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES)); -out_free_msic: - kfree(msic); -out: - - return -1; -} - -static int axon_msi_init(void) -{ - struct device_node *dn; - int found = 0; - - pr_debug("axon_msi: initialising ...\n"); - - for_each_compatible_node(dn, NULL, "ibm,axon-msic") { - if (axon_msi_setup_one(dn) == 0) - found++; - } - - if (found) { - ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; - ppc_md.msi_check_device = axon_msi_check_device; - - register_reboot_notifier(&axon_msi_reboot_notifier); - - pr_debug("axon_msi: registered callbacks!\n"); - } - - return 0; -} -arch_initcall(axon_msi_init); diff --git a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq.c b/trunk/arch/powerpc/platforms/cell/cbe_cpufreq.c index 0b6e8ee85ab1..ab511d5b65a4 100644 --- a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq.c +++ b/trunk/arch/powerpc/platforms/cell/cbe_cpufreq.c @@ -1,7 +1,7 @@ /* * cpufreq driver for the cell processor * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 * * Author: Christian Krafft * @@ -21,11 +21,18 @@ */ #include +#include + +#include +#include #include -#include +#include #include +#include +#include +#include + #include "cbe_regs.h" -#include "cbe_cpufreq.h" static DEFINE_MUTEX(cbe_switch_mutex); @@ -43,24 +50,159 @@ static struct cpufreq_frequency_table cbe_freqs[] = { {0, CPUFREQ_TABLE_END}, }; +/* to write to MIC register */ +static u64 MIC_Slow_Fast_Timer_table[] = { + [0 ... 7] = 0x007fc00000000000ull, +}; + +/* more values for the MIC */ +static u64 MIC_Slow_Next_Timer_table[] = { + 0x0000240000000000ull, + 0x0000268000000000ull, + 0x000029C000000000ull, + 0x00002D0000000000ull, + 0x0000300000000000ull, + 0x0000334000000000ull, + 0x000039C000000000ull, + 0x00003FC000000000ull, +}; + +static unsigned int pmi_frequency_limit = 0; /* * hardware specific functions */ -static int set_pmode(unsigned int cpu, unsigned int slow_mode) +static struct of_device *pmi_dev; + +#ifdef CONFIG_PPC_PMI +static int set_pmode_pmi(int cpu, unsigned int pmode) { - int rc; + int ret; + pmi_message_t pmi_msg; +#ifdef DEBUG + u64 time; +#endif + + pmi_msg.type = PMI_TYPE_FREQ_CHANGE; + pmi_msg.data1 = cbe_cpu_to_node(cpu); + pmi_msg.data2 = pmode; + +#ifdef DEBUG + time = (u64) get_cycles(); +#endif + + pmi_send_message(pmi_dev, pmi_msg); + ret = pmi_msg.data2; + + pr_debug("PMI returned slow mode %d\n", ret); + +#ifdef DEBUG + time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */ + time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */ + pr_debug("had to wait %lu ns for a transition\n", time); +#endif + return ret; +} +#endif + +static int get_pmode(int cpu) +{ + int ret; + struct cbe_pmd_regs __iomem *pmd_regs; + + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + ret = in_be64(&pmd_regs->pmsr) & 0x07; + + return ret; +} + +static int set_pmode_reg(int cpu, unsigned int pmode) +{ + struct cbe_pmd_regs __iomem *pmd_regs; + struct cbe_mic_tm_regs __iomem *mic_tm_regs; + u64 flags; + u64 value; + + local_irq_save(flags); + + mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + + pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr); + pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0); + + out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); + + out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); + + value = in_be64(&pmd_regs->pmcr); + /* set bits to zero */ + value &= 0xFFFFFFFFFFFFFFF8ull; + /* set bits to next pmode */ + value |= pmode; + + out_be64(&pmd_regs->pmcr, value); + + /* wait until new pmode appears in status register */ + value = in_be64(&pmd_regs->pmsr) & 0x07; + while(value != pmode) { + cpu_relax(); + value = in_be64(&pmd_regs->pmsr) & 0x07; + } - if (cbe_cpufreq_has_pmi) - rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode); + local_irq_restore(flags); + + return 0; +} + +static int set_pmode(int cpu, unsigned int slow_mode) { +#ifdef CONFIG_PPC_PMI + if (pmi_dev) + return set_pmode_pmi(cpu, slow_mode); else - rc = cbe_cpufreq_set_pmode(cpu, slow_mode); +#endif + return set_pmode_reg(cpu, slow_mode); +} + +static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg) +{ + u8 cpu; + u8 cbe_pmode_new; - pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu)); + BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); - return rc; + cpu = cbe_node_to_cpu(pmi_msg.data1); + cbe_pmode_new = pmi_msg.data2; + + pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency; + + pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit); } +static int pmi_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct cpufreq_policy *policy = data; + + if (event != CPUFREQ_INCOMPATIBLE) + return 0; + + cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit); + return 0; +} + +static struct notifier_block pmi_notifier_block = { + .notifier_call = pmi_notifier, +}; + +static struct pmi_handler cbe_pmi_handler = { + .type = PMI_TYPE_FREQ_CHANGE, + .handle_pmi_message = cbe_cpufreq_handle_pmi, +}; + + /* * cpufreq functions */ @@ -79,19 +221,8 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) pr_debug("init cpufreq on CPU %d\n", policy->cpu); - /* - * Let's check we can actually get to the CELL regs - */ - if (!cbe_get_cpu_pmd_regs(policy->cpu) || - !cbe_get_cpu_mic_tm_regs(policy->cpu)) { - pr_info("invalid CBE regs pointers for cpufreq\n"); - return -EINVAL; - } - max_freqp = of_get_property(cpu, "clock-frequency", NULL); - of_node_put(cpu); - if (!max_freqp) return -EINVAL; @@ -108,12 +239,10 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) } policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - - /* if DEBUG is enabled set_pmode() measures the latency - * of a transition */ + /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */ policy->cpuinfo.transition_latency = 25000; - cur_pmode = cbe_cpufreq_get_pmode(policy->cpu); + cur_pmode = get_pmode(policy->cpu); pr_debug("current pmode is at %d\n",cur_pmode); policy->cur = cbe_freqs[cur_pmode].frequency; @@ -124,13 +253,21 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); - /* this ensures that policy->cpuinfo_min - * and policy->cpuinfo_max are set correctly */ + if (pmi_dev) { + /* frequency might get limited later, initialize limit with max_freq */ + pmi_frequency_limit = max_freq; + cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); + } + + /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */ return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs); } static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) { + if (pmi_dev) + cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); + cpufreq_frequency_table_put_attr(policy->cpu); return 0; } @@ -140,13 +277,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy) return cpufreq_frequency_table_verify(policy, cbe_freqs); } -static int cbe_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + +static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int relation) { int rc; struct cpufreq_freqs freqs; - unsigned int cbe_pmode_new; + int cbe_pmode_new; cpufreq_frequency_table_target(policy, cbe_freqs, @@ -161,14 +298,12 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, mutex_lock(&cbe_switch_mutex); cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - pr_debug("setting frequency for cpu %d to %d kHz, " \ - "1/%d of max frequency\n", + pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", policy->cpu, cbe_freqs[cbe_pmode_new].frequency, cbe_freqs[cbe_pmode_new].index); rc = set_pmode(policy->cpu, cbe_pmode_new); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); mutex_unlock(&cbe_switch_mutex); @@ -191,14 +326,28 @@ static struct cpufreq_driver cbe_cpufreq_driver = { static int __init cbe_cpufreq_init(void) { +#ifdef CONFIG_PPC_PMI + struct device_node *np; +#endif if (!machine_is(cell)) return -ENODEV; +#ifdef CONFIG_PPC_PMI + np = of_find_node_by_type(NULL, "ibm,pmi"); + + pmi_dev = of_find_device_by_node(np); + if (pmi_dev) + pmi_register_handler(pmi_dev, &cbe_pmi_handler); +#endif return cpufreq_register_driver(&cbe_cpufreq_driver); } static void __exit cbe_cpufreq_exit(void) { +#ifdef CONFIG_PPC_PMI + if (pmi_dev) + pmi_unregister_handler(pmi_dev, &cbe_pmi_handler); +#endif cpufreq_unregister_driver(&cbe_cpufreq_driver); } diff --git a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq.h b/trunk/arch/powerpc/platforms/cell/cbe_cpufreq.h deleted file mode 100644 index c1d86bfa92ff..000000000000 --- a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * cbe_cpufreq.h - * - * This file contains the definitions used by the cbe_cpufreq driver. - * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 - * - * Author: Christian Krafft - * - */ - -#include -#include - -int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode); -int cbe_cpufreq_get_pmode(int cpu); - -int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); - -#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE) -extern bool cbe_cpufreq_has_pmi; -#else -#define cbe_cpufreq_has_pmi (0) -#endif diff --git a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/trunk/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c deleted file mode 100644 index 163263b3e1cd..000000000000 --- a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - * pervasive backend for the cbe_cpufreq driver - * - * This driver makes use of the pervasive unit to - * engage the desired frequency. - * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 - * - * Author: Christian Krafft - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include - -#include "cbe_regs.h" -#include "cbe_cpufreq.h" - -/* to write to MIC register */ -static u64 MIC_Slow_Fast_Timer_table[] = { - [0 ... 7] = 0x007fc00000000000ull, -}; - -/* more values for the MIC */ -static u64 MIC_Slow_Next_Timer_table[] = { - 0x0000240000000000ull, - 0x0000268000000000ull, - 0x000029C000000000ull, - 0x00002D0000000000ull, - 0x0000300000000000ull, - 0x0000334000000000ull, - 0x000039C000000000ull, - 0x00003FC000000000ull, -}; - - -int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode) -{ - struct cbe_pmd_regs __iomem *pmd_regs; - struct cbe_mic_tm_regs __iomem *mic_tm_regs; - u64 flags; - u64 value; -#ifdef DEBUG - long time; -#endif - - local_irq_save(flags); - - mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - -#ifdef DEBUG - time = jiffies; -#endif - - out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); - - out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); - - value = in_be64(&pmd_regs->pmcr); - /* set bits to zero */ - value &= 0xFFFFFFFFFFFFFFF8ull; - /* set bits to next pmode */ - value |= pmode; - - out_be64(&pmd_regs->pmcr, value); - -#ifdef DEBUG - /* wait until new pmode appears in status register */ - value = in_be64(&pmd_regs->pmsr) & 0x07; - while (value != pmode) { - cpu_relax(); - value = in_be64(&pmd_regs->pmsr) & 0x07; - } - - time = jiffies - time; - time = jiffies_to_msecs(time); - pr_debug("had to wait %lu ms for a transition using " \ - "pervasive unit\n", time); -#endif - local_irq_restore(flags); - - return 0; -} - - -int cbe_cpufreq_get_pmode(int cpu) -{ - int ret; - struct cbe_pmd_regs __iomem *pmd_regs; - - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - ret = in_be64(&pmd_regs->pmsr) & 0x07; - - return ret; -} - diff --git a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/trunk/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c deleted file mode 100644 index fc6f38982ff4..000000000000 --- a/trunk/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * pmi backend for the cbe_cpufreq driver - * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 - * - * Author: Christian Krafft - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include - -#ifdef DEBUG -#include -#endif - -#include "cbe_regs.h" -#include "cbe_cpufreq.h" - -static u8 pmi_slow_mode_limit[MAX_CBE]; - -bool cbe_cpufreq_has_pmi = false; -EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi); - -/* - * hardware specific functions - */ - -int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode) -{ - int ret; - pmi_message_t pmi_msg; -#ifdef DEBUG - long time; -#endif - pmi_msg.type = PMI_TYPE_FREQ_CHANGE; - pmi_msg.data1 = cbe_cpu_to_node(cpu); - pmi_msg.data2 = pmode; - -#ifdef DEBUG - time = jiffies; -#endif - pmi_send_message(pmi_msg); - -#ifdef DEBUG - time = jiffies - time; - time = jiffies_to_msecs(time); - pr_debug("had to wait %lu ms for a transition using " \ - "PMI\n", time); -#endif - ret = pmi_msg.data2; - pr_debug("PMI returned slow mode %d\n", ret); - - return ret; -} -EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); - - -static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) -{ - u8 node, slow_mode; - - BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); - - node = pmi_msg.data1; - slow_mode = pmi_msg.data2; - - pmi_slow_mode_limit[node] = slow_mode; - - pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode); -} - -static int pmi_notifier(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct cpufreq_policy *policy = data; - struct cpufreq_frequency_table *cbe_freqs; - u8 node; - - cbe_freqs = cpufreq_frequency_get_table(policy->cpu); - node = cbe_cpu_to_node(policy->cpu); - - pr_debug("got notified, event=%lu, node=%u\n", event, node); - - if (pmi_slow_mode_limit[node] != 0) { - pr_debug("limiting node %d to slow mode %d\n", - node, pmi_slow_mode_limit[node]); - - cpufreq_verify_within_limits(policy, 0, - - cbe_freqs[pmi_slow_mode_limit[node]].frequency); - } - - return 0; -} - -static struct notifier_block pmi_notifier_block = { - .notifier_call = pmi_notifier, -}; - -static struct pmi_handler cbe_pmi_handler = { - .type = PMI_TYPE_FREQ_CHANGE, - .handle_pmi_message = cbe_cpufreq_handle_pmi, -}; - - - -static int __init cbe_cpufreq_pmi_init(void) -{ - cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0; - - if (!cbe_cpufreq_has_pmi) - return -ENODEV; - - cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); - - return 0; -} - -static void __exit cbe_cpufreq_pmi_exit(void) -{ - cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); - pmi_unregister_handler(&cbe_pmi_handler); -} - -module_init(cbe_cpufreq_pmi_init); -module_exit(cbe_cpufreq_pmi_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Christian Krafft "); diff --git a/trunk/arch/powerpc/platforms/cell/cbe_regs.c b/trunk/arch/powerpc/platforms/cell/cbe_regs.c index c8f7f0007422..12c9674b4b1f 100644 --- a/trunk/arch/powerpc/platforms/cell/cbe_regs.c +++ b/trunk/arch/powerpc/platforms/cell/cbe_regs.c @@ -174,13 +174,6 @@ static struct device_node *cbe_get_be_node(int cpu_id) cpu_handle = of_get_property(np, "cpus", &len); - /* - * the CAB SLOF tree is non compliant, so we just assume - * there is only one node - */ - if (WARN_ON_ONCE(!cpu_handle)) - return np; - for (i=0; iid); - if (!pmd_regs) { - pr_info("invalid CBE regs pointer for cbe_thermal\n"); - return -EINVAL; - } - out_be64(&pmd_regs->tm_str2, str2); out_be64(&pmd_regs->tm_str1.val, str1.val); out_be64(&pmd_regs->tm_tpr.val, tpr.val); out_be64(&pmd_regs->tm_cr1.val, cr1.val); out_be64(&pmd_regs->tm_cr2, cr2); } - - return 0; } static int __init thermal_init(void) { - int rc = init_default_values(); + init_default_values(); - if (rc == 0) { - spu_add_sysdev_attr_group(&spu_attribute_group); - cpu_add_sysdev_attr_group(&ppe_attribute_group); - } + spu_add_sysdev_attr_group(&spu_attribute_group); + cpu_add_sysdev_attr_group(&ppe_attribute_group); - return rc; + return 0; } module_init(thermal_init); diff --git a/trunk/arch/powerpc/platforms/cell/spu_base.c b/trunk/arch/powerpc/platforms/cell/spu_base.c index 90124228b8f4..96a8f609690c 100644 --- a/trunk/arch/powerpc/platforms/cell/spu_base.c +++ b/trunk/arch/powerpc/platforms/cell/spu_base.c @@ -35,37 +35,18 @@ #include #include #include -#include -#include "spu_priv1_mmio.h" const struct spu_management_ops *spu_management_ops; EXPORT_SYMBOL_GPL(spu_management_ops); const struct spu_priv1_ops *spu_priv1_ops; -EXPORT_SYMBOL_GPL(spu_priv1_ops); - -struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; -EXPORT_SYMBOL_GPL(cbe_spu_info); - -/* - * Protects cbe_spu_info and spu->number. - */ -static DEFINE_SPINLOCK(spu_lock); -/* - * List of all spus in the system. - * - * This list is iterated by callers from irq context and callers that - * want to sleep. Thus modifications need to be done with both - * spu_full_list_lock and spu_full_list_mutex held, while iterating - * through it requires either of these locks. - * - * In addition spu_full_list_lock protects all assignmens to - * spu->mm. - */ +static struct list_head spu_list[MAX_NUMNODES]; static LIST_HEAD(spu_full_list); -static DEFINE_SPINLOCK(spu_full_list_lock); -static DEFINE_MUTEX(spu_full_list_mutex); +static DEFINE_MUTEX(spu_mutex); +static DEFINE_SPINLOCK(spu_list_lock); + +EXPORT_SYMBOL_GPL(spu_priv1_ops); void spu_invalidate_slbs(struct spu *spu) { @@ -84,12 +65,12 @@ void spu_flush_all_slbs(struct mm_struct *mm) struct spu *spu; unsigned long flags; - spin_lock_irqsave(&spu_full_list_lock, flags); + spin_lock_irqsave(&spu_list_lock, flags); list_for_each_entry(spu, &spu_full_list, full_list) { if (spu->mm == mm) spu_invalidate_slbs(spu); } - spin_unlock_irqrestore(&spu_full_list_lock, flags); + spin_unlock_irqrestore(&spu_list_lock, flags); } /* The hack below stinks... try to do something better one of @@ -107,9 +88,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm) { unsigned long flags; - spin_lock_irqsave(&spu_full_list_lock, flags); + spin_lock_irqsave(&spu_list_lock, flags); spu->mm = mm; - spin_unlock_irqrestore(&spu_full_list_lock, flags); + spin_unlock_irqrestore(&spu_list_lock, flags); if (mm) mm_needs_global_tlbie(mm); } @@ -409,7 +390,7 @@ static void spu_free_irqs(struct spu *spu) free_irq(spu->irqs[2], spu); } -void spu_init_channels(struct spu *spu) +static void spu_init_channels(struct spu *spu) { static const struct { unsigned channel; @@ -442,7 +423,46 @@ void spu_init_channels(struct spu *spu) out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); } } -EXPORT_SYMBOL_GPL(spu_init_channels); + +struct spu *spu_alloc_node(int node) +{ + struct spu *spu = NULL; + + mutex_lock(&spu_mutex); + if (!list_empty(&spu_list[node])) { + spu = list_entry(spu_list[node].next, struct spu, list); + list_del_init(&spu->list); + pr_debug("Got SPU %d %d\n", spu->number, spu->node); + } + mutex_unlock(&spu_mutex); + + if (spu) + spu_init_channels(spu); + return spu; +} +EXPORT_SYMBOL_GPL(spu_alloc_node); + +struct spu *spu_alloc(void) +{ + struct spu *spu = NULL; + int node; + + for (node = 0; node < MAX_NUMNODES; node++) { + spu = spu_alloc_node(node); + if (spu) + break; + } + + return spu; +} + +void spu_free(struct spu *spu) +{ + mutex_lock(&spu_mutex); + list_add_tail(&spu->list, &spu_list[spu->node]); + mutex_unlock(&spu_mutex); +} +EXPORT_SYMBOL_GPL(spu_free); static int spu_shutdown(struct sys_device *sysdev) { @@ -461,12 +481,12 @@ struct sysdev_class spu_sysdev_class = { int spu_add_sysdev_attr(struct sysdev_attribute *attr) { struct spu *spu; + mutex_lock(&spu_mutex); - mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysdev_create_file(&spu->sysdev, attr); - mutex_unlock(&spu_full_list_mutex); + mutex_unlock(&spu_mutex); return 0; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); @@ -474,12 +494,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); int spu_add_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; + mutex_lock(&spu_mutex); - mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysfs_create_group(&spu->sysdev.kobj, attrs); - mutex_unlock(&spu_full_list_mutex); + mutex_unlock(&spu_mutex); return 0; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); @@ -488,22 +508,24 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); void spu_remove_sysdev_attr(struct sysdev_attribute *attr) { struct spu *spu; + mutex_lock(&spu_mutex); - mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysdev_remove_file(&spu->sysdev, attr); - mutex_unlock(&spu_full_list_mutex); + + mutex_unlock(&spu_mutex); } EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); void spu_remove_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; + mutex_lock(&spu_mutex); - mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysfs_remove_group(&spu->sysdev.kobj, attrs); - mutex_unlock(&spu_full_list_mutex); + + mutex_unlock(&spu_mutex); } EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); @@ -531,19 +553,16 @@ static int __init create_spu(void *data) int ret; static int number; unsigned long flags; - struct timespec ts; ret = -ENOMEM; spu = kzalloc(sizeof (*spu), GFP_KERNEL); if (!spu) goto out; - spu->alloc_state = SPU_FREE; - spin_lock_init(&spu->register_lock); - spin_lock(&spu_lock); + mutex_lock(&spu_mutex); spu->number = number++; - spin_unlock(&spu_lock); + mutex_unlock(&spu_mutex); ret = spu_create_spu(spu, data); @@ -560,22 +579,15 @@ static int __init create_spu(void *data) if (ret) goto out_free_irqs; - mutex_lock(&cbe_spu_info[spu->node].list_mutex); - list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); - cbe_spu_info[spu->node].n_spus++; - mutex_unlock(&cbe_spu_info[spu->node].list_mutex); - - mutex_lock(&spu_full_list_mutex); - spin_lock_irqsave(&spu_full_list_lock, flags); + mutex_lock(&spu_mutex); + spin_lock_irqsave(&spu_list_lock, flags); + list_add(&spu->list, &spu_list[spu->node]); list_add(&spu->full_list, &spu_full_list); - spin_unlock_irqrestore(&spu_full_list_lock, flags); - mutex_unlock(&spu_full_list_mutex); - - spu->stats.util_state = SPU_UTIL_IDLE_LOADED; - ktime_get_ts(&ts); - spu->stats.tstamp = timespec_to_ns(&ts); + spin_unlock_irqrestore(&spu_list_lock, flags); + mutex_unlock(&spu_mutex); - INIT_LIST_HEAD(&spu->aff_list); + spu->stats.utilization_state = SPU_UTIL_IDLE; + spu->stats.tstamp = jiffies; goto out; @@ -596,20 +608,12 @@ static const char *spu_state_names[] = { static unsigned long long spu_acct_time(struct spu *spu, enum spu_utilization_state state) { - struct timespec ts; unsigned long long time = spu->stats.times[state]; - /* - * If the spu is idle or the context is stopped, utilization - * statistics are not updated. Apply the time delta from the - * last recorded state of the spu. - */ - if (spu->stats.util_state == state) { - ktime_get_ts(&ts); - time += timespec_to_ns(&ts) - spu->stats.tstamp; - } + if (spu->stats.utilization_state == state) + time += jiffies - spu->stats.tstamp; - return time / NSEC_PER_MSEC; + return jiffies_to_msecs(time); } @@ -619,11 +623,11 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) return sprintf(buf, "%s %llu %llu %llu %llu " "%llu %llu %llu %llu %llu %llu %llu %llu\n", - spu_state_names[spu->stats.util_state], + spu_state_names[spu->stats.utilization_state], spu_acct_time(spu, SPU_UTIL_USER), spu_acct_time(spu, SPU_UTIL_SYSTEM), spu_acct_time(spu, SPU_UTIL_IOWAIT), - spu_acct_time(spu, SPU_UTIL_IDLE_LOADED), + spu_acct_time(spu, SPU_UTIL_IDLE), spu->stats.vol_ctx_switch, spu->stats.invol_ctx_switch, spu->stats.slb_flt, @@ -636,146 +640,12 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); -/* Hardcoded affinity idxs for QS20 */ -#define SPES_PER_BE 8 -static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; -static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; - -static struct spu *spu_lookup_reg(int node, u32 reg) -{ - struct spu *spu; - - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { - if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg) - return spu; - } - return NULL; -} - -static void init_aff_QS20_harcoded(void) -{ - int node, i; - struct spu *last_spu, *spu; - u32 reg; - - for (node = 0; node < MAX_NUMNODES; node++) { - last_spu = NULL; - for (i = 0; i < SPES_PER_BE; i++) { - reg = QS20_reg_idxs[i]; - spu = spu_lookup_reg(node, reg); - if (!spu) - continue; - spu->has_mem_affinity = QS20_reg_memory[reg]; - if (last_spu) - list_add_tail(&spu->aff_list, - &last_spu->aff_list); - last_spu = spu; - } - } -} - -static int of_has_vicinity(void) -{ - struct spu* spu; - - spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list); - return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; -} - -static struct spu *aff_devnode_spu(int cbe, struct device_node *dn) -{ - struct spu *spu; - - list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) - if (spu_devnode(spu) == dn) - return spu; - return NULL; -} - -static struct spu * -aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid) -{ - struct spu *spu; - const phandle *vic_handles; - int lenp, i; - - list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { - if (spu_devnode(spu) == avoid) - continue; - vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp); - for (i=0; i < (lenp / sizeof(phandle)); i++) { - if (vic_handles[i] == target->linux_phandle) - return spu; - } - } - return NULL; -} - -static void init_aff_fw_vicinity_node(int cbe) -{ - struct spu *spu, *last_spu; - struct device_node *vic_dn, *last_spu_dn; - phandle avoid_ph; - const phandle *vic_handles; - const char *name; - int lenp, i, added, mem_aff; - - last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list); - avoid_ph = 0; - for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { - last_spu_dn = spu_devnode(last_spu); - vic_handles = get_property(last_spu_dn, "vicinity", &lenp); - - for (i = 0; i < (lenp / sizeof(phandle)); i++) { - if (vic_handles[i] == avoid_ph) - continue; - - vic_dn = of_find_node_by_phandle(vic_handles[i]); - if (!vic_dn) - continue; - - name = get_property(vic_dn, "name", NULL); - if (strcmp(name, "spe") == 0) { - spu = aff_devnode_spu(cbe, vic_dn); - avoid_ph = last_spu_dn->linux_phandle; - } - else { - mem_aff = strcmp(name, "mic-tm") == 0; - spu = aff_node_next_to(cbe, vic_dn, last_spu_dn); - if (!spu) - continue; - if (mem_aff) { - last_spu->has_mem_affinity = 1; - spu->has_mem_affinity = 1; - } - avoid_ph = vic_dn->linux_phandle; - } - list_add_tail(&spu->aff_list, &last_spu->aff_list); - last_spu = spu; - break; - } - } -} - -static void init_aff_fw_vicinity(void) -{ - int cbe; - - /* sets has_mem_affinity for each spu, as long as the - * spu->aff_list list, linking each spu to its neighbors - */ - for (cbe = 0; cbe < MAX_NUMNODES; cbe++) - init_aff_fw_vicinity_node(cbe); -} - static int __init init_spu_base(void) { int i, ret = 0; - for (i = 0; i < MAX_NUMNODES; i++) { - mutex_init(&cbe_spu_info[i].list_mutex); - INIT_LIST_HEAD(&cbe_spu_info[i].spus); - } + for (i = 0; i < MAX_NUMNODES; i++) + INIT_LIST_HEAD(&spu_list[i]); if (!spu_management_ops) goto out; @@ -805,25 +675,16 @@ static int __init init_spu_base(void) fb_append_extra_logo(&logo_spe_clut224, ret); } - mutex_lock(&spu_full_list_mutex); xmon_register_spus(&spu_full_list); - crash_register_spus(&spu_full_list); - mutex_unlock(&spu_full_list_mutex); - spu_add_sysdev_attr(&attr_stat); - if (of_has_vicinity()) { - init_aff_fw_vicinity(); - } else { - long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) - init_aff_QS20_harcoded(); - } + spu_add_sysdev_attr(&attr_stat); return 0; out_unregister_sysdev_class: sysdev_class_unregister(&spu_sysdev_class); out: + return ret; } module_init(init_spu_base); diff --git a/trunk/arch/powerpc/platforms/cell/spu_syscalls.c b/trunk/arch/powerpc/platforms/cell/spu_syscalls.c index dd2c6688c8aa..261b507a901a 100644 --- a/trunk/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/trunk/arch/powerpc/platforms/cell/spu_syscalls.c @@ -34,27 +34,14 @@ struct spufs_calls spufs_calls = { * this file is not used and the syscalls directly enter the fs code */ asmlinkage long sys_spu_create(const char __user *name, - unsigned int flags, mode_t mode, int neighbor_fd) + unsigned int flags, mode_t mode) { long ret; struct module *owner = spufs_calls.owner; - struct file *neighbor; - int fput_needed; ret = -ENOSYS; if (owner && try_module_get(owner)) { - if (flags & SPU_CREATE_AFFINITY_SPU) { - neighbor = fget_light(neighbor_fd, &fput_needed); - if (neighbor) { - ret = spufs_calls.create_thread(name, flags, - mode, neighbor); - fput_light(neighbor, fput_needed); - } - } - else { - ret = spufs_calls.create_thread(name, flags, - mode, NULL); - } + ret = spufs_calls.create_thread(name, flags, mode); module_put(owner); } return ret; diff --git a/trunk/arch/powerpc/platforms/cell/spufs/context.c b/trunk/arch/powerpc/platforms/cell/spufs/context.c index 6694f86d7000..6d7bd60f5380 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/context.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/context.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -56,12 +55,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) ctx->ops = &spu_backing_ops; ctx->owner = get_task_mm(current); INIT_LIST_HEAD(&ctx->rq); - INIT_LIST_HEAD(&ctx->aff_list); if (gang) spu_gang_add_ctx(gang, ctx); ctx->cpus_allowed = current->cpus_allowed; spu_set_timeslice(ctx); - ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; + ctx->stats.execution_state = SPUCTX_UTIL_USER; + ctx->stats.tstamp = jiffies; atomic_inc(&nr_spu_contexts); goto out; @@ -82,8 +81,6 @@ void destroy_spu_context(struct kref *kref) spu_fini_csa(&ctx->csa); if (ctx->gang) spu_gang_remove_ctx(ctx->gang, ctx); - if (ctx->prof_priv_kref) - kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); BUG_ON(!list_empty(&ctx->rq)); atomic_dec(&nr_spu_contexts); kfree(ctx); @@ -169,39 +166,6 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags) void spu_acquire_saved(struct spu_context *ctx) { spu_acquire(ctx); - if (ctx->state != SPU_STATE_SAVED) { - set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); + if (ctx->state != SPU_STATE_SAVED) spu_deactivate(ctx); - } -} - -/** - * spu_release_saved - unlock spu context and return it to the runqueue - * @ctx: context to unlock - */ -void spu_release_saved(struct spu_context *ctx) -{ - BUG_ON(ctx->state != SPU_STATE_SAVED); - - if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags)) - spu_activate(ctx, 0); - - spu_release(ctx); } - -void spu_set_profile_private_kref(struct spu_context *ctx, - struct kref *prof_info_kref, - void ( * prof_info_release) (struct kref *kref)) -{ - ctx->prof_priv_kref = prof_info_kref; - ctx->prof_priv_release = prof_info_release; -} -EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); - -void *spu_get_profile_private_kref(struct spu_context *ctx) -{ - return ctx->prof_priv_kref; -} -EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); - - diff --git a/trunk/arch/powerpc/platforms/cell/spufs/coredump.c b/trunk/arch/powerpc/platforms/cell/spufs/coredump.c index 5e31799b1e3f..5d9ad5a0307b 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/coredump.c @@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file) spu_acquire_saved(ctx_info->ctx); for (j = 0; j < spufs_coredump_num_notes; j++) spufs_arch_write_note(ctx_info, j, file); - spu_release_saved(ctx_info->ctx); + spu_release(ctx_info->ctx); list_del(&ctx_info->list); kfree(ctx_info); } diff --git a/trunk/arch/powerpc/platforms/cell/spufs/fault.c b/trunk/arch/powerpc/platforms/cell/spufs/fault.c index 917eab4be486..f53a07437472 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/fault.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/fault.c @@ -179,14 +179,16 @@ int spufs_handle_class1(struct spu_context *ctx) if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) return 0; - spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); + spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT); pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, dsisr, ctx->state); ctx->stats.hash_flt++; - if (ctx->state == SPU_STATE_RUNNABLE) + if (ctx->state == SPU_STATE_RUNNABLE) { ctx->spu->stats.hash_flt++; + spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT); + } /* we must not hold the lock when entering spu_handle_mm_fault */ spu_release(ctx); @@ -224,7 +226,7 @@ int spufs_handle_class1(struct spu_context *ctx) } else spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); - spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); return ret; } EXPORT_SYMBOL_GPL(spufs_handle_class1); diff --git a/trunk/arch/powerpc/platforms/cell/spufs/file.c b/trunk/arch/powerpc/platforms/cell/spufs/file.c index 7de4e919687b..c2814ea96af2 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/file.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/file.c @@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer, spu_acquire_saved(ctx); ret = __spufs_regs_read(ctx, buffer, size, pos); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer, ret = copy_from_user(lscsa->gprs + *pos - size, buffer, size) ? -EFAULT : size; - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer, spu_acquire_saved(ctx); ret = __spufs_fpcr_read(ctx, buffer, size, pos); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer, ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, buffer, size) ? -EFAULT : size; - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal1_read(ctx, buf, len, pos); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -934,13 +934,6 @@ static const struct file_operations spufs_signal1_fops = { .mmap = spufs_signal1_mmap, }; -static const struct file_operations spufs_signal1_nosched_fops = { - .open = spufs_signal1_open, - .release = spufs_signal1_release, - .write = spufs_signal1_write, - .mmap = spufs_signal1_mmap, -}; - static int spufs_signal2_open(struct inode *inode, struct file *file) { struct spufs_inode_info *i = SPUFS_I(inode); @@ -999,7 +992,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal2_read(ctx, buf, len, pos); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -1069,13 +1062,6 @@ static const struct file_operations spufs_signal2_fops = { .mmap = spufs_signal2_mmap, }; -static const struct file_operations spufs_signal2_nosched_fops = { - .open = spufs_signal2_open, - .release = spufs_signal2_release, - .write = spufs_signal2_write, - .mmap = spufs_signal2_mmap, -}; - static void spufs_signal1_type_set(void *data, u64 val) { struct spu_context *ctx = data; @@ -1626,7 +1612,7 @@ static void spufs_decr_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->decr.slot[0] = (u32) val; - spu_release_saved(ctx); + spu_release(ctx); } static u64 __spufs_decr_get(void *data) @@ -1642,7 +1628,7 @@ static u64 spufs_decr_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = __spufs_decr_get(data); - spu_release_saved(ctx); + spu_release(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, @@ -1651,21 +1637,17 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, static void spufs_decr_status_set(void *data, u64 val) { struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); - if (val) - ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; - else - ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; - spu_release_saved(ctx); + lscsa->decr_status.slot[0] = (u32) val; + spu_release(ctx); } static u64 __spufs_decr_status_get(void *data) { struct spu_context *ctx = data; - if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) - return SPU_DECR_STATUS_RUNNING; - else - return 0; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->decr_status.slot[0]; } static u64 spufs_decr_status_get(void *data) @@ -1674,7 +1656,7 @@ static u64 spufs_decr_status_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = __spufs_decr_status_get(data); - spu_release_saved(ctx); + spu_release(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, @@ -1686,7 +1668,7 @@ static void spufs_event_mask_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->event_mask.slot[0] = (u32) val; - spu_release_saved(ctx); + spu_release(ctx); } static u64 __spufs_event_mask_get(void *data) @@ -1702,7 +1684,7 @@ static u64 spufs_event_mask_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = __spufs_event_mask_get(data); - spu_release_saved(ctx); + spu_release(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, @@ -1726,7 +1708,7 @@ static u64 spufs_event_status_get(void *data) spu_acquire_saved(ctx); ret = __spufs_event_status_get(data); - spu_release_saved(ctx); + spu_release(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, @@ -1738,7 +1720,7 @@ static void spufs_srr0_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->srr0.slot[0] = (u32) val; - spu_release_saved(ctx); + spu_release(ctx); } static u64 spufs_srr0_get(void *data) @@ -1748,7 +1730,7 @@ static u64 spufs_srr0_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = lscsa->srr0.slot[0]; - spu_release_saved(ctx); + spu_release(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, @@ -1804,7 +1786,7 @@ static u64 spufs_lslr_get(void *data) spu_acquire_saved(ctx); ret = __spufs_lslr_get(data); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -1868,7 +1850,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_mbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -1906,7 +1888,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_ibox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -1947,7 +1929,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_wbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -1997,7 +1979,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_dma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -2048,7 +2030,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_proxydma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release_saved(ctx); + spu_release(ctx); return ret; } @@ -2083,26 +2065,14 @@ static const char *ctx_state_names[] = { }; static unsigned long long spufs_acct_time(struct spu_context *ctx, - enum spu_utilization_state state) + enum spuctx_execution_state state) { - struct timespec ts; - unsigned long long time = ctx->stats.times[state]; + unsigned long time = ctx->stats.times[state]; - /* - * In general, utilization statistics are updated by the controlling - * thread as the spu context moves through various well defined - * state transitions, but if the context is lazily loaded its - * utilization statistics are not updated as the controlling thread - * is not tightly coupled with the execution of the spu context. We - * calculate and apply the time delta from the last recorded state - * of the spu context. - */ - if (ctx->spu && ctx->stats.util_state == state) { - ktime_get_ts(&ts); - time += timespec_to_ns(&ts) - ctx->stats.tstamp; - } + if (ctx->stats.execution_state == state) + time += jiffies - ctx->stats.tstamp; - return time / NSEC_PER_MSEC; + return jiffies_to_msecs(time); } static unsigned long long spufs_slb_flts(struct spu_context *ctx) @@ -2137,11 +2107,11 @@ static int spufs_show_stat(struct seq_file *s, void *private) spu_acquire(ctx); seq_printf(s, "%s %llu %llu %llu %llu " "%llu %llu %llu %llu %llu %llu %llu %llu\n", - ctx_state_names[ctx->stats.util_state], - spufs_acct_time(ctx, SPU_UTIL_USER), - spufs_acct_time(ctx, SPU_UTIL_SYSTEM), - spufs_acct_time(ctx, SPU_UTIL_IOWAIT), - spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), + ctx_state_names[ctx->stats.execution_state], + spufs_acct_time(ctx, SPUCTX_UTIL_USER), + spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM), + spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT), + spufs_acct_time(ctx, SPUCTX_UTIL_LOADED), ctx->stats.vol_ctx_switch, ctx->stats.invol_ctx_switch, spufs_slb_flts(ctx), @@ -2214,8 +2184,8 @@ struct tree_descr spufs_dir_nosched_contents[] = { { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, - { "signal1", &spufs_signal1_nosched_fops, 0222, }, - { "signal2", &spufs_signal2_nosched_fops, 0222, }, + { "signal1", &spufs_signal1_fops, 0666, }, + { "signal2", &spufs_signal2_fops, 0666, }, { "signal1_type", &spufs_signal1_type, 0666, }, { "signal2_type", &spufs_signal2_type, 0666, }, { "mss", &spufs_mss_fops, 0666, }, diff --git a/trunk/arch/powerpc/platforms/cell/spufs/gang.c b/trunk/arch/powerpc/platforms/cell/spufs/gang.c index 71a443253021..212ea78f9051 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/gang.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/gang.c @@ -35,9 +35,7 @@ struct spu_gang *alloc_spu_gang(void) kref_init(&gang->kref); mutex_init(&gang->mutex); - mutex_init(&gang->aff_mutex); INIT_LIST_HEAD(&gang->list); - INIT_LIST_HEAD(&gang->aff_list_head); out: return gang; @@ -75,10 +73,6 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx) { mutex_lock(&gang->mutex); WARN_ON(ctx->gang != gang); - if (!list_empty(&ctx->aff_list)) { - list_del_init(&ctx->aff_list); - gang->aff_flags &= ~AFF_OFFSETS_SET; - } list_del_init(&ctx->gang_list); gang->contexts--; mutex_unlock(&gang->mutex); diff --git a/trunk/arch/powerpc/platforms/cell/spufs/inode.c b/trunk/arch/powerpc/platforms/cell/spufs/inode.c index b3d0dd118dd0..7eb4d6cbcb74 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/inode.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/inode.c @@ -316,107 +316,11 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) return ret; } -static struct spu_context * -spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, - struct file *filp) -{ - struct spu_context *tmp, *neighbor; - int count, node; - int aff_supp; - - aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, - struct spu, cbe_list))->aff_list); - - if (!aff_supp) - return ERR_PTR(-EINVAL); - - if (flags & SPU_CREATE_GANG) - return ERR_PTR(-EINVAL); - - if (flags & SPU_CREATE_AFFINITY_MEM && - gang->aff_ref_ctx && - gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) - return ERR_PTR(-EEXIST); - - if (gang->aff_flags & AFF_MERGED) - return ERR_PTR(-EBUSY); - - neighbor = NULL; - if (flags & SPU_CREATE_AFFINITY_SPU) { - if (!filp || filp->f_op != &spufs_context_fops) - return ERR_PTR(-EINVAL); - - neighbor = get_spu_context( - SPUFS_I(filp->f_dentry->d_inode)->i_ctx); - - if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && - !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && - !list_entry(neighbor->aff_list.next, struct spu_context, - aff_list)->aff_head) - return ERR_PTR(-EEXIST); - - if (gang != neighbor->gang) - return ERR_PTR(-EINVAL); - - count = 1; - list_for_each_entry(tmp, &gang->aff_list_head, aff_list) - count++; - if (list_empty(&neighbor->aff_list)) - count++; - - for (node = 0; node < MAX_NUMNODES; node++) { - if ((cbe_spu_info[node].n_spus - atomic_read( - &cbe_spu_info[node].reserved_spus)) >= count) - break; - } - - if (node == MAX_NUMNODES) - return ERR_PTR(-EEXIST); - } - - return neighbor; -} - -static void -spufs_set_affinity(unsigned int flags, struct spu_context *ctx, - struct spu_context *neighbor) -{ - if (flags & SPU_CREATE_AFFINITY_MEM) - ctx->gang->aff_ref_ctx = ctx; - - if (flags & SPU_CREATE_AFFINITY_SPU) { - if (list_empty(&neighbor->aff_list)) { - list_add_tail(&neighbor->aff_list, - &ctx->gang->aff_list_head); - neighbor->aff_head = 1; - } - - if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) - || list_entry(neighbor->aff_list.next, struct spu_context, - aff_list)->aff_head) { - list_add(&ctx->aff_list, &neighbor->aff_list); - } else { - list_add_tail(&ctx->aff_list, &neighbor->aff_list); - if (neighbor->aff_head) { - neighbor->aff_head = 0; - ctx->aff_head = 1; - } - } - - if (!ctx->gang->aff_ref_ctx) - ctx->gang->aff_ref_ctx = ctx; - } -} - -static int -spufs_create_context(struct inode *inode, struct dentry *dentry, - struct vfsmount *mnt, int flags, int mode, - struct file *aff_filp) +static int spufs_create_context(struct inode *inode, + struct dentry *dentry, + struct vfsmount *mnt, int flags, int mode) { int ret; - int affinity; - struct spu_gang *gang; - struct spu_context *neighbor; ret = -EPERM; if ((flags & SPU_CREATE_NOSCHED) && @@ -432,29 +336,9 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) goto out_unlock; - gang = NULL; - neighbor = NULL; - affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); - if (affinity) { - gang = SPUFS_I(inode)->i_gang; - ret = -EINVAL; - if (!gang) - goto out_unlock; - mutex_lock(&gang->aff_mutex); - neighbor = spufs_assert_affinity(flags, gang, aff_filp); - if (IS_ERR(neighbor)) { - ret = PTR_ERR(neighbor); - goto out_aff_unlock; - } - } - ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); if (ret) - goto out_aff_unlock; - - if (affinity) - spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx, - neighbor); + goto out_unlock; /* * get references for dget and mntget, will be released @@ -468,9 +352,6 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, goto out; } -out_aff_unlock: - if (affinity) - mutex_unlock(&gang->aff_mutex); out_unlock: mutex_unlock(&inode->i_mutex); out: @@ -569,8 +450,7 @@ static int spufs_create_gang(struct inode *inode, static struct file_system_type spufs_type; -long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, - struct file *filp) +long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) { struct dentry *dentry; int ret; @@ -607,7 +487,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, dentry, nd->mnt, mode); else return spufs_create_context(nd->dentry->d_inode, - dentry, nd->mnt, flags, mode, filp); + dentry, nd->mnt, flags, mode); out_dput: dput(dentry); diff --git a/trunk/arch/powerpc/platforms/cell/spufs/run.c b/trunk/arch/powerpc/platforms/cell/spufs/run.c index 0b50fa5cb39d..58ae13b7de84 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/run.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/run.c @@ -18,17 +18,15 @@ void spufs_stop_callback(struct spu *spu) wake_up_all(&ctx->stop_wq); } -static inline int spu_stopped(struct spu_context *ctx, u32 *stat) +static inline int spu_stopped(struct spu_context *ctx, u32 * stat) { struct spu *spu; u64 pte_fault; *stat = ctx->ops->status_read(ctx); - - spu = ctx->spu; - if (ctx->state != SPU_STATE_RUNNABLE || - test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + if (ctx->state != SPU_STATE_RUNNABLE) return 1; + spu = ctx->spu; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? @@ -126,10 +124,8 @@ static int spu_setup_isolated(struct spu_context *ctx) return ret; } -static int spu_run_init(struct spu_context *ctx, u32 *npc) +static int spu_run_init(struct spu_context *ctx, u32 * npc) { - spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - if (ctx->flags & SPU_CREATE_ISOLATE) { unsigned long runcntl; @@ -155,20 +151,16 @@ static int spu_run_init(struct spu_context *ctx, u32 *npc) ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); } - spuctx_switch_state(ctx, SPU_UTIL_USER); - return 0; } -static int spu_run_fini(struct spu_context *ctx, u32 *npc, - u32 *status) +static int spu_run_fini(struct spu_context *ctx, u32 * npc, + u32 * status) { int ret = 0; *status = ctx->ops->status_read(ctx); *npc = ctx->ops->npc_read(ctx); - - spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); spu_release(ctx); if (signal_pending(current)) @@ -297,10 +289,10 @@ static inline int spu_process_events(struct spu_context *ctx) return ret; } -long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) +long spufs_run_spu(struct file *file, struct spu_context *ctx, + u32 *npc, u32 *event) { int ret; - struct spu *spu; u32 status; if (mutex_lock_interruptible(&ctx->run_mutex)) @@ -336,17 +328,6 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); if (unlikely(ret)) break; - spu = ctx->spu; - if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, - &ctx->sched_flags))) { - if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { - spu_switch_notify(spu, ctx); - continue; - } - } - - spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); @@ -375,7 +356,6 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) (ctx->state == SPU_STATE_RUNNABLE)) ctx->stats.libassist++; - ctx->ops->master_stop(ctx); ret = spu_run_fini(ctx, npc, &status); spu_yield(ctx); diff --git a/trunk/arch/powerpc/platforms/cell/spufs/sched.c b/trunk/arch/powerpc/platforms/cell/spufs/sched.c index 227968b4779d..e5b4dd1db286 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/sched.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/sched.c @@ -51,6 +51,9 @@ struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); struct list_head runq[MAX_PRIO]; spinlock_t runq_lock; + struct list_head active_list[MAX_NUMNODES]; + struct mutex active_mutex[MAX_NUMNODES]; + int nr_active[MAX_NUMNODES]; int nr_waiting; }; @@ -124,7 +127,7 @@ void __spu_update_sched_info(struct spu_context *ctx) ctx->policy = current->policy; /* - * A lot of places that don't hold list_mutex poke into + * A lot of places that don't hold active_mutex poke into * cpus_allowed, including grab_runnable_context which * already holds the runq_lock. So abuse runq_lock * to protect this field aswell. @@ -138,9 +141,9 @@ void spu_update_sched_info(struct spu_context *ctx) { int node = ctx->spu->node; - mutex_lock(&cbe_spu_info[node].list_mutex); + mutex_lock(&spu_prio->active_mutex[node]); __spu_update_sched_info(ctx); - mutex_unlock(&cbe_spu_info[node].list_mutex); + mutex_unlock(&spu_prio->active_mutex[node]); } static int __node_allowed(struct spu_context *ctx, int node) @@ -166,56 +169,56 @@ static int node_allowed(struct spu_context *ctx, int node) return rval; } -static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); +/** + * spu_add_to_active_list - add spu to active list + * @spu: spu to add to the active list + */ +static void spu_add_to_active_list(struct spu *spu) +{ + int node = spu->node; -void spu_switch_notify(struct spu *spu, struct spu_context *ctx) + mutex_lock(&spu_prio->active_mutex[node]); + spu_prio->nr_active[node]++; + list_add_tail(&spu->list, &spu_prio->active_list[node]); + mutex_unlock(&spu_prio->active_mutex[node]); +} + +static void __spu_remove_from_active_list(struct spu *spu) { - blocking_notifier_call_chain(&spu_switch_notifier, - ctx ? ctx->object_id : 0, spu); + list_del_init(&spu->list); + spu_prio->nr_active[spu->node]--; } -static void notify_spus_active(void) +/** + * spu_remove_from_active_list - remove spu from active list + * @spu: spu to remove from the active list + */ +static void spu_remove_from_active_list(struct spu *spu) { - int node; + int node = spu->node; - /* - * Wake up the active spu_contexts. - * - * When the awakened processes see their "notify_active" flag is set, - * they will call spu_switch_notify(); - */ - for_each_online_node(node) { - struct spu *spu; + mutex_lock(&spu_prio->active_mutex[node]); + __spu_remove_from_active_list(spu); + mutex_unlock(&spu_prio->active_mutex[node]); +} - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { - if (spu->alloc_state != SPU_FREE) { - struct spu_context *ctx = spu->ctx; - set_bit(SPU_SCHED_NOTIFY_ACTIVE, - &ctx->sched_flags); - mb(); - wake_up_all(&ctx->stop_wq); - } - } - mutex_unlock(&cbe_spu_info[node].list_mutex); - } +static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); + +static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) +{ + blocking_notifier_call_chain(&spu_switch_notifier, + ctx ? ctx->object_id : 0, spu); } int spu_switch_event_register(struct notifier_block * n) { - int ret; - ret = blocking_notifier_chain_register(&spu_switch_notifier, n); - if (!ret) - notify_spus_active(); - return ret; + return blocking_notifier_chain_register(&spu_switch_notifier, n); } -EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(&spu_switch_notifier, n); } -EXPORT_SYMBOL_GPL(spu_switch_event_unregister); /** * spu_bind_context - bind spu context to physical spu @@ -226,12 +229,6 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, spu->number, spu->node); - spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - - if (ctx->flags & SPU_CREATE_NOSCHED) - atomic_inc(&cbe_spu_info[spu->node].reserved_spus); - if (!list_empty(&ctx->aff_list)) - atomic_inc(&ctx->gang->aff_sched_count); ctx->stats.slb_flt_base = spu->stats.slb_flt; ctx->stats.class2_intr_base = spu->stats.class2_intr; @@ -241,7 +238,6 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) ctx->spu = spu; ctx->ops = &spu_hw_ops; spu->pid = current->pid; - spu->tgid = current->tgid; spu_associate_mm(spu, ctx->owner); spu->ibox_callback = spufs_ibox_callback; spu->wbox_callback = spufs_wbox_callback; @@ -255,153 +251,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) spu_cpu_affinity_set(spu, raw_smp_processor_id()); spu_switch_notify(spu, ctx); ctx->state = SPU_STATE_RUNNABLE; - - spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); -} - -/* - * Must be used with the list_mutex held. - */ -static inline int sched_spu(struct spu *spu) -{ - BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); - - return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); -} - -static void aff_merge_remaining_ctxs(struct spu_gang *gang) -{ - struct spu_context *ctx; - - list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { - if (list_empty(&ctx->aff_list)) - list_add(&ctx->aff_list, &gang->aff_list_head); - } - gang->aff_flags |= AFF_MERGED; -} - -static void aff_set_offsets(struct spu_gang *gang) -{ - struct spu_context *ctx; - int offset; - - offset = -1; - list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, - aff_list) { - if (&ctx->aff_list == &gang->aff_list_head) - break; - ctx->aff_offset = offset--; - } - - offset = 0; - list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { - if (&ctx->aff_list == &gang->aff_list_head) - break; - ctx->aff_offset = offset++; - } - - gang->aff_flags |= AFF_OFFSETS_SET; -} - -static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, - int group_size, int lowest_offset) -{ - struct spu *spu; - int node, n; - - /* - * TODO: A better algorithm could be used to find a good spu to be - * used as reference location for the ctxs chain. - */ - node = cpu_to_node(raw_smp_processor_id()); - for (n = 0; n < MAX_NUMNODES; n++, node++) { - node = (node < MAX_NUMNODES) ? node : 0; - if (!node_allowed(ctx, node)) - continue; - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { - if ((!mem_aff || spu->has_mem_affinity) && - sched_spu(spu)) { - mutex_unlock(&cbe_spu_info[node].list_mutex); - return spu; - } - } - mutex_unlock(&cbe_spu_info[node].list_mutex); - } - return NULL; -} - -static void aff_set_ref_point_location(struct spu_gang *gang) -{ - int mem_aff, gs, lowest_offset; - struct spu_context *ctx; - struct spu *tmp; - - mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; - lowest_offset = 0; - gs = 0; - - list_for_each_entry(tmp, &gang->aff_list_head, aff_list) - gs++; - - list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, - aff_list) { - if (&ctx->aff_list == &gang->aff_list_head) - break; - lowest_offset = ctx->aff_offset; - } - - gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset); -} - -static struct spu *ctx_location(struct spu *ref, int offset, int node) -{ - struct spu *spu; - - spu = NULL; - if (offset >= 0) { - list_for_each_entry(spu, ref->aff_list.prev, aff_list) { - BUG_ON(spu->node != node); - if (offset == 0) - break; - if (sched_spu(spu)) - offset--; - } - } else { - list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { - BUG_ON(spu->node != node); - if (offset == 0) - break; - if (sched_spu(spu)) - offset++; - } - } - - return spu; -} - -/* - * affinity_check is called each time a context is going to be scheduled. - * It returns the spu ptr on which the context must run. - */ -static int has_affinity(struct spu_context *ctx) -{ - struct spu_gang *gang = ctx->gang; - - if (list_empty(&ctx->aff_list)) - return 0; - - mutex_lock(&gang->aff_mutex); - if (!gang->aff_ref_spu) { - if (!(gang->aff_flags & AFF_MERGED)) - aff_merge_remaining_ctxs(gang); - if (!(gang->aff_flags & AFF_OFFSETS_SET)) - aff_set_offsets(gang); - aff_set_ref_point_location(gang); - } - mutex_unlock(&gang->aff_mutex); - - return gang->aff_ref_spu != NULL; + spu_switch_state(spu, SPU_UTIL_SYSTEM); } /** @@ -413,13 +263,9 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, spu->pid, spu->number, spu->node); - spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - if (spu->ctx->flags & SPU_CREATE_NOSCHED) - atomic_dec(&cbe_spu_info[spu->node].reserved_spus); - if (!list_empty(&ctx->aff_list)) - if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) - ctx->gang->aff_ref_spu = NULL; + spu_switch_state(spu, SPU_UTIL_IDLE); + spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); spu_save(&ctx->csa, spu); @@ -432,8 +278,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) spu->dma_callback = NULL; spu_associate_mm(spu, NULL); spu->pid = 0; - spu->tgid = 0; ctx->ops = &spu_backing_ops; + ctx->spu = NULL; spu->flags = 0; spu->ctx = NULL; @@ -441,10 +287,6 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) (spu->stats.slb_flt - ctx->stats.slb_flt_base); ctx->stats.class2_intr += (spu->stats.class2_intr - ctx->stats.class2_intr_base); - - /* This maps the underlying spu state to idle */ - spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); - ctx->spu = NULL; } /** @@ -510,41 +352,18 @@ static void spu_prio_wait(struct spu_context *ctx) static struct spu *spu_get_idle(struct spu_context *ctx) { - struct spu *spu; - int node, n; - - if (has_affinity(ctx)) { - node = ctx->gang->aff_ref_spu->node; + struct spu *spu = NULL; + int node = cpu_to_node(raw_smp_processor_id()); + int n; - mutex_lock(&cbe_spu_info[node].list_mutex); - spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node); - if (spu && spu->alloc_state == SPU_FREE) - goto found; - mutex_unlock(&cbe_spu_info[node].list_mutex); - return NULL; - } - - node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; - - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { - if (spu->alloc_state == SPU_FREE) - goto found; - } - mutex_unlock(&cbe_spu_info[node].list_mutex); + spu = spu_alloc_node(node); + if (spu) + break; } - - return NULL; - - found: - spu->alloc_state = SPU_USED; - mutex_unlock(&cbe_spu_info[node].list_mutex); - pr_debug("Got SPU %d %d\n", spu->number, spu->node); - spu_init_channels(spu); return spu; } @@ -574,15 +393,15 @@ static struct spu *find_victim(struct spu_context *ctx) if (!node_allowed(ctx, node)) continue; - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + mutex_lock(&spu_prio->active_mutex[node]); + list_for_each_entry(spu, &spu_prio->active_list[node], list) { struct spu_context *tmp = spu->ctx; if (tmp->prio > ctx->prio && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } - mutex_unlock(&cbe_spu_info[node].list_mutex); + mutex_unlock(&spu_prio->active_mutex[node]); if (victim) { /* @@ -607,11 +426,7 @@ static struct spu *find_victim(struct spu_context *ctx) victim = NULL; goto restart; } - - mutex_lock(&cbe_spu_info[node].list_mutex); - cbe_spu_info[node].nr_active--; - mutex_unlock(&cbe_spu_info[node].list_mutex); - + spu_remove_from_active_list(spu); spu_unbind_context(spu, victim); victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; @@ -640,6 +455,8 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { + spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); + do { struct spu *spu; @@ -660,12 +477,8 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) if (!spu && rt_prio(ctx->prio)) spu = find_victim(ctx); if (spu) { - int node = spu->node; - - mutex_lock(&cbe_spu_info[node].list_mutex); spu_bind_context(spu, ctx); - cbe_spu_info[node].nr_active++; - mutex_unlock(&cbe_spu_info[node].list_mutex); + spu_add_to_active_list(spu); return 0; } @@ -687,7 +500,7 @@ static struct spu_context *grab_runnable_context(int prio, int node) int best; spin_lock(&spu_prio->runq_lock); - best = find_first_bit(spu_prio->bitmap, prio); + best = sched_find_first_bit(spu_prio->bitmap); while (best < prio) { struct list_head *rq = &spu_prio->runq[best]; @@ -714,17 +527,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - int node = spu->node; - - mutex_lock(&cbe_spu_info[node].list_mutex); + spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); - spu->alloc_state = SPU_FREE; - cbe_spu_info[node].nr_active--; - mutex_unlock(&cbe_spu_info[node].list_mutex); - ctx->stats.vol_ctx_switch++; spu->stats.vol_ctx_switch++; - + spu_free(spu); if (new) wake_up(&new->stop_wq); } @@ -743,11 +550,21 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) */ void spu_deactivate(struct spu_context *ctx) { + /* + * We must never reach this for a nosched context, + * but handle the case gracefull instead of panicing. + */ + if (ctx->flags & SPU_CREATE_NOSCHED) { + WARN_ON(1); + return; + } + __spu_deactivate(ctx, 1, MAX_PRIO); + spuctx_switch_state(ctx, SPUCTX_UTIL_USER); } /** - * spu_yield - yield a physical spu if others are waiting + * spu_yield - yield a physical spu if others are waiting * @ctx: spu context to yield * * Check if there is a higher priority context waiting and if yes @@ -758,12 +575,17 @@ void spu_yield(struct spu_context *ctx) { if (!(ctx->flags & SPU_CREATE_NOSCHED)) { mutex_lock(&ctx->state_mutex); - __spu_deactivate(ctx, 0, MAX_PRIO); + if (__spu_deactivate(ctx, 0, MAX_PRIO)) + spuctx_switch_state(ctx, SPUCTX_UTIL_USER); + else { + spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED); + spu_switch_state(ctx->spu, SPU_UTIL_USER); + } mutex_unlock(&ctx->state_mutex); } } -static noinline void spusched_tick(struct spu_context *ctx) +static void spusched_tick(struct spu_context *ctx) { if (ctx->flags & SPU_CREATE_NOSCHED) return; @@ -774,7 +596,7 @@ static noinline void spusched_tick(struct spu_context *ctx) return; /* - * Unfortunately list_mutex ranks outside of state_mutex, so + * Unfortunately active_mutex ranks outside of state_mutex, so * we have to trylock here. If we fail give the context another * tick and try again. */ @@ -784,11 +606,12 @@ static noinline void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { + + __spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); ctx->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; - spu->alloc_state = SPU_FREE; - cbe_spu_info[spu->node].nr_active--; + spu_free(spu); wake_up(&new->stop_wq); /* * We need to break out of the wait loop in @@ -809,7 +632,7 @@ static noinline void spusched_tick(struct spu_context *ctx) * * Return the number of tasks currently running or waiting to run. * - * Note that we don't take runq_lock / list_mutex here. Reading + * Note that we don't take runq_lock / active_mutex here. Reading * a single 32bit value is atomic on powerpc, and we don't care * about memory ordering issues here. */ @@ -818,7 +641,7 @@ static unsigned long count_active_contexts(void) int nr_active = 0, node; for (node = 0; node < MAX_NUMNODES; node++) - nr_active += cbe_spu_info[node].nr_active; + nr_active += spu_prio->nr_active[node]; nr_active += spu_prio->nr_waiting; return nr_active; @@ -858,18 +681,19 @@ static void spusched_wake(unsigned long data) static int spusched_thread(void *unused) { - struct spu *spu; + struct spu *spu, *next; int node; while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) - if (spu->ctx) - spusched_tick(spu->ctx); - mutex_unlock(&cbe_spu_info[node].list_mutex); + mutex_lock(&spu_prio->active_mutex[node]); + list_for_each_entry_safe(spu, next, + &spu_prio->active_list[node], + list) + spusched_tick(spu->ctx); + mutex_unlock(&spu_prio->active_mutex[node]); } } @@ -927,9 +751,10 @@ int __init spu_sched_init(void) INIT_LIST_HEAD(&spu_prio->runq[i]); __clear_bit(i, spu_prio->bitmap); } + __set_bit(MAX_PRIO, spu_prio->bitmap); for (i = 0; i < MAX_NUMNODES; i++) { - mutex_init(&cbe_spu_info[i].list_mutex); - INIT_LIST_HEAD(&cbe_spu_info[i].spus); + mutex_init(&spu_prio->active_mutex[i]); + INIT_LIST_HEAD(&spu_prio->active_list[i]); } spin_lock_init(&spu_prio->runq_lock); @@ -958,9 +783,9 @@ int __init spu_sched_init(void) return err; } -void spu_sched_exit(void) +void __exit spu_sched_exit(void) { - struct spu *spu; + struct spu *spu, *tmp; int node; remove_proc_entry("spu_loadavg", NULL); @@ -969,11 +794,13 @@ void spu_sched_exit(void) kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) - if (spu->alloc_state != SPU_FREE) - spu->alloc_state = SPU_FREE; - mutex_unlock(&cbe_spu_info[node].list_mutex); + mutex_lock(&spu_prio->active_mutex[node]); + list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], + list) { + list_del_init(&spu->list); + spu_free(spu); + } + mutex_unlock(&spu_prio->active_mutex[node]); } kfree(spu_prio); } diff --git a/trunk/arch/powerpc/platforms/cell/spufs/spu_restore.c b/trunk/arch/powerpc/platforms/cell/spufs/spu_restore.c index 21a9c952d88b..4e19ed7a0756 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/spu_restore.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/spu_restore.c @@ -84,13 +84,13 @@ static inline void restore_decr(void) unsigned int decr_running; unsigned int decr; - /* Restore, Step 6(moved): + /* Restore, Step 6: * If the LSCSA "decrementer running" flag is set * then write the SPU_WrDec channel with the * decrementer value from LSCSA. */ offset = LSCSA_QW_OFFSET(decr_status); - decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; + decr_running = regs_spill[offset].slot[0]; if (decr_running) { offset = LSCSA_QW_OFFSET(decr); decr = regs_spill[offset].slot[0]; @@ -318,10 +318,10 @@ int main() build_dma_list(lscsa_ea); /* Step 3. */ restore_upper_240kb(lscsa_ea); /* Step 4. */ /* Step 5: done by 'exit'. */ + restore_decr(); /* Step 6. */ enqueue_putllc(lscsa_ea); /* Step 7. */ set_tag_update(); /* Step 8. */ read_tag_status(); /* Step 9. */ - restore_decr(); /* moved Step 6. */ read_llar_status(); /* Step 10. */ write_ppu_mb(); /* Step 11. */ write_ppuint_mb(); /* Step 12. */ diff --git a/trunk/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/trunk/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped index f383b027e8bf..15183d209b58 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped +++ b/trunk/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped @@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x24fd8081, 0x1cd80081, 0x33001180, -0x42034003, +0x42030003, 0x33800284, 0x1c010204, 0x40200000, @@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x23fffd84, 0x1c100183, 0x217ffa85, -0x3080b000, -0x3080b201, -0x3080b402, -0x3080b603, -0x3080b804, -0x3080ba05, -0x3080bc06, -0x3080be07, -0x3080c008, -0x3080c209, -0x3080c40a, -0x3080c60b, -0x3080c80c, -0x3080ca0d, -0x3080cc0e, -0x3080ce0f, +0x3080a000, +0x3080a201, +0x3080a402, +0x3080a603, +0x3080a804, +0x3080aa05, +0x3080ac06, +0x3080ae07, +0x3080b008, +0x3080b209, +0x3080b40a, +0x3080b60b, +0x3080b80c, +0x3080ba0d, +0x3080bc0e, +0x3080be0f, 0x00003ffc, 0x00000000, 0x00000000, @@ -48,18 +48,19 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x3ec00083, 0xb0a14103, 0x01a00204, -0x3ec10083, -0x4202c002, -0xb0a14203, -0x21a00802, -0x3fbf028a, -0x3f20050a, -0x3fbe0502, +0x3ec10082, +0x4202800e, +0x04000703, +0xb0a14202, +0x21a00803, +0x3fbf028d, +0x3f20068d, +0x3fbe0682, 0x3fe30102, 0x21a00882, -0x3f82028b, -0x3fe3058b, -0x3fbf0584, +0x3f82028f, +0x3fe3078f, +0x3fbf0784, 0x3f200204, 0x3fbe0204, 0x3fe30204, @@ -74,285 +75,252 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x21a00083, 0x40800082, 0x21a00b02, -0x10002612, -0x42a00003, -0x42074006, -0x1800c204, -0x40a00008, -0x40800789, -0x1c010305, -0x34000302, +0x10002818, +0x42a00002, +0x32800007, +0x4207000c, +0x18008208, +0x40a0000b, +0x4080020a, +0x40800709, +0x00200000, +0x42070002, +0x3ac30384, 0x1cffc489, -0x3ec00303, -0x3ec00287, -0xb0408403, -0x24000302, -0x34000282, -0x1c020306, -0xb0408207, -0x18020204, -0x24000282, -0x217ffa09, -0x04000402, -0x21a00802, -0x3fbe0504, -0x3fe30204, -0x21a00884, -0x42074002, -0x21a00902, -0x40803c03, -0x21a00983, -0x04000485, -0x21a00a05, +0x00200000, +0x18008383, +0x38830382, +0x4cffc486, +0x3ac28185, +0xb0408584, +0x28830382, +0x1c020387, +0x38828182, +0xb0408405, +0x1802c408, +0x28828182, +0x217ff886, +0x04000583, +0x21a00803, +0x3fbe0682, +0x3fe30102, +0x04000106, +0x21a00886, +0x04000603, +0x21a00903, +0x40803c02, +0x21a00982, +0x40800003, +0x04000184, +0x21a00a04, 0x40802202, 0x21a00a82, -0x21a00805, -0x21a00884, -0x3fbf0582, +0x42028005, +0x34208702, +0x21002282, +0x21a00804, +0x21a00886, +0x3fbf0782, 0x3f200102, 0x3fbe0102, 0x3fe30102, 0x21a00902, 0x40804003, 0x21a00983, -0x21a00a05, +0x21a00a04, 0x40805a02, 0x21a00a82, 0x40800083, 0x21a00b83, 0x01a00c02, -0x30809c03, -0x34000182, -0x14004102, -0x21002082, -0x01a00d82, -0x3080a003, -0x34000182, +0x01a00d83, +0x3420c282, 0x21a00e02, -0x3080a203, -0x34000182, -0x21a00f02, -0x3080a403, -0x34000182, -0x77400100, -0x3080a603, -0x34000182, +0x34210283, +0x21a00f03, +0x34200284, +0x77400200, +0x3421c282, 0x21a00702, -0x3080a803, -0x34000182, -0x21a00082, -0x3080aa03, -0x34000182, +0x34218283, +0x21a00083, +0x34214282, 0x21a00b02, -0x4020007f, -0x3080ae02, -0x42004805, -0x3080ac04, -0x34000103, -0x34000202, -0x1cffc183, -0x3b810106, -0x0f608184, -0x42013802, -0x5c020183, -0x38810102, -0x3b810102, -0x21000e83, +0x4200480c, +0x00200000, +0x1c010286, +0x34220284, +0x34220302, +0x0f608203, +0x5c024204, +0x3b81810b, +0x42013c02, +0x00200000, +0x18008185, +0x38808183, +0x3b814182, +0x21004e84, 0x4020007f, 0x35000100, -0x00000470, -0x000002f8, -0x00000430, +0x000004e0, +0x000002a0, +0x000002e8, +0x00000428, 0x00000360, -0x000002f8, +0x000002e8, +0x000004a0, +0x00000468, 0x000003c8, -0x000004a8, -0x00000298, 0x00000360, -0x00200000, 0x409ffe02, 0x30801203, -0x40800208, -0x3ec40084, -0x40800407, -0x3ac20289, -0xb060c104, -0x3ac1c284, +0x40800204, +0x3ec40085, +0x10009c09, +0x3ac10606, +0xb060c105, +0x4020007f, +0x4020007f, 0x20801203, -0x38820282, -0x41004003, -0xb0408189, -0x28820282, -0x3881c282, -0xb0408304, -0x2881c282, -0x00400000, -0x40800003, -0x35000000, -0x30809e03, -0x34000182, +0x38810602, +0xb0408586, +0x28810602, +0x32004180, +0x34204702, 0x21a00382, 0x4020007f, -0x327fde00, +0x327fdc80, 0x409ffe02, 0x30801203, -0x40800206, -0x3ec40084, -0x40800407, -0x40800608, -0x3ac1828a, -0x3ac20289, -0xb060c104, -0x3ac1c284, +0x40800204, +0x3ec40087, +0x40800405, +0x00200000, +0x40800606, +0x3ac10608, +0x3ac14609, +0x3ac1860a, +0xb060c107, 0x20801203, -0x38818282, 0x41004003, -0xb040818a, -0x10005b0b, -0x41201003, -0x28818282, -0x3881c282, -0xb0408184, -0x41193f83, -0x60ffc003, -0x2881c282, -0x38820282, -0xb0408189, -0x28820282, -0x327fef80, -0x409ffe02, -0x30801203, -0x40800207, -0x3ec40086, -0x4120100b, -0x10005b14, -0x40800404, -0x3ac1c289, -0x40800608, -0xb060c106, -0x3ac10286, -0x3ac2028a, -0x20801203, -0x3881c282, +0x38810602, +0x4020007f, +0xb0408188, +0x4020007f, +0x28810602, +0x41201002, +0x38814603, +0x10009c09, +0xb060c109, +0x4020007f, +0x28814603, 0x41193f83, +0x38818602, 0x60ffc003, -0xb0408589, -0x2881c282, -0x38810282, -0xb0408586, -0x28810282, -0x38820282, 0xb040818a, -0x28820282, -0x4020007f, -0x327fe280, +0x28818602, +0x32003080, 0x409ffe02, 0x30801203, -0x40800207, -0x3ec40084, -0x40800408, -0x10005b14, -0x40800609, -0x3ac1c28a, -0x3ac2028b, -0xb060c104, -0x3ac24284, +0x40800204, +0x3ec40087, +0x41201008, +0x10009c14, +0x40800405, +0x3ac10609, +0x40800606, +0x3ac1460a, +0xb060c107, +0x3ac1860b, 0x20801203, -0x41201003, -0x3881c282, -0xb040830a, -0x2881c282, -0x38820282, -0xb040818b, +0x38810602, +0xb0408409, +0x28810602, +0x38814603, +0xb060c40a, +0x4020007f, +0x28814603, 0x41193f83, +0x38818602, 0x60ffc003, -0x28820282, -0x38824282, -0xb0408184, -0x28824282, +0xb040818b, +0x28818602, +0x32002380, +0x409ffe02, +0x30801204, +0x40800205, +0x3ec40083, +0x40800406, +0x3ac14607, +0x3ac18608, +0xb0810103, +0x41004002, +0x20801204, +0x4020007f, +0x38814603, +0x10009c0b, +0xb060c107, 0x4020007f, -0x327fd580, +0x4020007f, +0x28814603, +0x38818602, +0x4020007f, +0x4020007f, +0xb0408588, +0x28818602, +0x4020007f, +0x32001780, 0x409ffe02, -0x1000658e, -0x40800206, +0x1000640e, +0x40800204, 0x30801203, -0x40800407, -0x3ec40084, -0x40800608, -0x3ac1828a, -0x3ac20289, -0xb060c104, -0x3ac1c284, +0x40800405, +0x3ec40087, +0x40800606, +0x3ac10608, +0x3ac14609, +0x3ac1860a, +0xb060c107, 0x20801203, 0x413d8003, -0x38818282, -0x4020007f, -0x327fd800, -0x409ffe03, -0x30801202, -0x40800207, -0x3ec40084, -0x10005b09, -0x3ac1c288, -0xb0408184, +0x38810602, 0x4020007f, +0x327fd780, +0x409ffe02, +0x10007f0c, +0x40800205, +0x30801204, +0x40800406, +0x3ec40083, +0x3ac14607, +0x3ac18608, +0xb0810103, +0x413d8002, +0x20801204, +0x38814603, 0x4020007f, -0x20801202, -0x3881c282, -0xb0408308, -0x2881c282, -0x327fc680, +0x327feb80, 0x409ffe02, -0x1000588b, -0x40800208, 0x30801203, -0x40800407, -0x3ec40084, -0x3ac20289, -0xb060c104, -0x3ac1c284, +0x40800204, +0x3ec40087, +0x40800405, +0x1000650a, +0x40800606, +0x3ac10608, +0x3ac14609, +0x3ac1860a, +0xb060c107, 0x20801203, -0x413d8003, -0x38820282, -0x327fbd80, -0x00200000, -0x00000da0, -0x00000000, -0x00000000, -0x00000000, -0x00000d90, -0x00000000, -0x00000000, -0x00000000, -0x00000db0, -0x00000000, -0x00000000, -0x00000000, -0x00000dc0, -0x00000000, -0x00000000, -0x00000000, -0x00000d80, -0x00000000, -0x00000000, -0x00000000, -0x00000df0, -0x00000000, -0x00000000, -0x00000000, -0x00000de0, -0x00000000, -0x00000000, -0x00000000, -0x00000dd0, -0x00000000, -0x00000000, -0x00000000, -0x00000e04, -0x00000000, -0x00000000, +0x38810602, +0xb0408588, +0x4020007f, +0x327fc980, +0x00400000, +0x40800003, +0x4020007f, +0x35000000, 0x00000000, -0x00000e00, 0x00000000, 0x00000000, 0x00000000, diff --git a/trunk/arch/powerpc/platforms/cell/spufs/spufs.h b/trunk/arch/powerpc/platforms/cell/spufs/spufs.h index 8b20c0c1556f..08b3530288ac 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/trunk/arch/powerpc/platforms/cell/spufs/spufs.h @@ -40,13 +40,17 @@ enum { struct spu_context_ops; struct spu_gang; -enum { - SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ -}; - -/* ctx->sched_flags */ -enum { - SPU_SCHED_NOTIFY_ACTIVE, +/* + * This is the state for spu utilization reporting to userspace. + * Because this state is visible to userspace it must never change and needs + * to be kept strictly separate from any internal state kept by the kernel. + */ +enum spuctx_execution_state { + SPUCTX_UTIL_USER = 0, + SPUCTX_UTIL_SYSTEM, + SPUCTX_UTIL_IOWAIT, + SPUCTX_UTIL_LOADED, + SPUCTX_UTIL_MAX }; struct spu_context { @@ -85,8 +89,6 @@ struct spu_context { struct list_head gang_list; struct spu_gang *gang; - struct kref *prof_priv_kref; - void ( * prof_priv_release) (struct kref *kref); /* owner thread */ pid_t tid; @@ -102,9 +104,9 @@ struct spu_context { /* statistics */ struct { /* updates protected by ctx->state_mutex */ - enum spu_utilization_state util_state; - unsigned long long tstamp; /* time of last state switch */ - unsigned long long times[SPU_UTIL_MAX]; + enum spuctx_execution_state execution_state; + unsigned long tstamp; /* time of last ctx switch */ + unsigned long times[SPUCTX_UTIL_MAX]; unsigned long long vol_ctx_switch; unsigned long long invol_ctx_switch; unsigned long long min_flt; @@ -116,10 +118,6 @@ struct spu_context { unsigned long long class2_intr_base; /* # at last ctx switch */ unsigned long long libassist; } stats; - - struct list_head aff_list; - int aff_head; - int aff_offset; }; struct spu_gang { @@ -127,19 +125,8 @@ struct spu_gang { struct mutex mutex; struct kref kref; int contexts; - - struct spu_context *aff_ref_ctx; - struct list_head aff_list_head; - struct mutex aff_mutex; - int aff_flags; - struct spu *aff_ref_spu; - atomic_t aff_sched_count; }; -/* Flag bits for spu_gang aff_flags */ -#define AFF_OFFSETS_SET 1 -#define AFF_MERGED 2 - struct mfc_dma_command { int32_t pad; /* reserved */ uint32_t lsa; /* local storage address */ @@ -203,9 +190,10 @@ extern struct tree_descr spufs_dir_contents[]; extern struct tree_descr spufs_dir_nosched_contents[]; /* system call implementation */ -long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); -long spufs_create(struct nameidata *nd, unsigned int flags, - mode_t mode, struct file *filp); +long spufs_run_spu(struct file *file, + struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create(struct nameidata *nd, + unsigned int flags, mode_t mode); extern const struct file_operations spufs_context_fops; /* gang management */ @@ -218,9 +206,6 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); /* fault handling */ int spufs_handle_class1(struct spu_context *ctx); -/* affinity */ -struct spu *affinity_check(struct spu_context *ctx); - /* context management */ extern atomic_t nr_spu_contexts; static inline void spu_acquire(struct spu_context *ctx) @@ -242,17 +227,15 @@ void spu_unmap_mappings(struct spu_context *ctx); void spu_forget(struct spu_context *ctx); int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); void spu_acquire_saved(struct spu_context *ctx); -void spu_release_saved(struct spu_context *ctx); int spu_activate(struct spu_context *ctx, unsigned long flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); -void spu_switch_notify(struct spu *spu, struct spu_context *ctx); void spu_set_timeslice(struct spu_context *ctx); void spu_update_sched_info(struct spu_context *ctx); void __spu_update_sched_info(struct spu_context *ctx); int __init spu_sched_init(void); -void spu_sched_exit(void); +void __exit spu_sched_exit(void); extern char *isolated_loader; @@ -310,34 +293,30 @@ extern int spufs_coredump_num_notes; * line. */ static inline void spuctx_switch_state(struct spu_context *ctx, - enum spu_utilization_state new_state) + enum spuctx_execution_state new_state) { - unsigned long long curtime; - signed long long delta; - struct timespec ts; - struct spu *spu; - enum spu_utilization_state old_state; + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); - ktime_get_ts(&ts); - curtime = timespec_to_ns(&ts); - delta = curtime - ctx->stats.tstamp; + if (ctx->stats.execution_state != new_state) { + unsigned long curtime = jiffies; - WARN_ON(!mutex_is_locked(&ctx->state_mutex)); - WARN_ON(delta < 0); - - spu = ctx->spu; - old_state = ctx->stats.util_state; - ctx->stats.util_state = new_state; - ctx->stats.tstamp = curtime; - - /* - * Update the physical SPU utilization statistics. - */ - if (spu) { - ctx->stats.times[old_state] += delta; - spu->stats.times[old_state] += delta; - spu->stats.util_state = new_state; + ctx->stats.times[ctx->stats.execution_state] += + curtime - ctx->stats.tstamp; + ctx->stats.tstamp = curtime; + ctx->stats.execution_state = new_state; + } +} + +static inline void spu_switch_state(struct spu *spu, + enum spuctx_execution_state new_state) +{ + if (spu->stats.utilization_state != new_state) { + unsigned long curtime = jiffies; + + spu->stats.times[spu->stats.utilization_state] += + curtime - spu->stats.tstamp; spu->stats.tstamp = curtime; + spu->stats.utilization_state = new_state; } } diff --git a/trunk/arch/powerpc/platforms/cell/spufs/switch.c b/trunk/arch/powerpc/platforms/cell/spufs/switch.c index 27ffdae98e5a..9c506ba08cdc 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/switch.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/switch.c @@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) case MFC_CNTL_SUSPEND_COMPLETE: if (csa) { csa->priv2.mfc_control_RW = - MFC_CNTL_SUSPEND_MASK | + in_be64(&priv2->mfc_control_RW) | MFC_CNTL_SUSPEND_DMA_QUEUE; } break; @@ -190,7 +190,9 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == MFC_CNTL_SUSPEND_COMPLETE); if (csa) { - csa->priv2.mfc_control_RW = 0; + csa->priv2.mfc_control_RW = + in_be64(&priv2->mfc_control_RW) & + ~MFC_CNTL_SUSPEND_DMA_QUEUE; } break; } @@ -249,8 +251,16 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu) * Read MFC_CNTL[Ds]. Update saved copy of * CSA.MFC_CNTL[Ds]. */ - csa->priv2.mfc_control_RW |= - in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING; + if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { + csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; + csa->suspend_time = get_cycles(); + out_be64(&priv2->spu_chnlcntptr_RW, 7ULL); + eieio(); + csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW); + eieio(); + } else { + csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; + } } static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) @@ -261,8 +271,7 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) * Write MFC_CNTL[Dh] set to a '1' to halt * the decrementer. */ - out_be64(&priv2->mfc_control_RW, - MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); + out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); eieio(); } @@ -606,7 +615,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Save, Step 42: @@ -617,7 +626,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); /* Save the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + for (i = 0; i < 7; i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -974,13 +983,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) */ } -static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, - struct spu *spu) +static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; /* Restore, Step 7: - * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend + * Restore, Step 47. + * Write MFC_Cntl[Dh,Sc]='1','1' to suspend * the queue and halt the decrementer. */ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | @@ -1081,7 +1090,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; u64 idx; int i; @@ -1093,7 +1102,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) out_be64(&priv2->spu_chnldata_RW, 0UL); /* Reset the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + for (i = 0; i < 7; i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1280,15 +1289,7 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu) cycles_t resume_time = get_cycles(); cycles_t delta_time = resume_time - csa->suspend_time; - csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; - if (csa->lscsa->decr.slot[0] < delta_time) { - csa->lscsa->decr_status.slot[0] |= - SPU_DECR_STATUS_WRAPPED; - } - csa->lscsa->decr.slot[0] -= delta_time; - } else { - csa->lscsa->decr_status.slot[0] = 0; } } @@ -1397,18 +1398,6 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); } -static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) -{ - struct spu_priv2 __iomem *priv2 = spu->priv2; - - /* Restore, Step 47. - * Write MFC_Cntl[Sc,Sm]='1','0' to suspend - * the queue. - */ - out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); - eieio(); -} - static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) { /* Restore, Step 49: @@ -1559,10 +1548,10 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) * "wrapped" flag is set, OR in a '1' to * CSA.SPU_Event_Status[Tm]. */ - if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) { + if (csa->lscsa->decr_status.slot[0] == 1) { csa->spu_chnldata_RW[0] |= 0x20; } - if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) && + if ((csa->lscsa->decr_status.slot[0] == 1) && (csa->spu_chnlcnt_RW[0] == 0 && ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { @@ -1573,13 +1562,18 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Restore, Step 59: - * Restore the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + + /* Restore CH 1 without count */ + out_be64(&priv2->spu_chnlcntptr_RW, 1); + out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]); + + /* Restore the following CH: [0,3,4,24,25,27] */ + for (i = 0; i < 7; i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1938,7 +1932,7 @@ static void harvest(struct spu_state *prev, struct spu *spu) set_switch_pending(prev, spu); /* Step 5. */ stop_spu_isolate(spu); /* NEW. */ remove_other_spu_access(prev, spu); /* Step 6. */ - suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ + suspend_mfc(prev, spu); /* Step 7. */ wait_suspend_mfc_complete(prev, spu); /* Step 8. */ if (!suspend_spe(prev, spu)) /* Step 9. */ clear_spu_status(prev, spu); /* Step 10. */ diff --git a/trunk/arch/powerpc/platforms/cell/spufs/syscalls.c b/trunk/arch/powerpc/platforms/cell/spufs/syscalls.c index 43f0fb88abbc..8e37bdf4dfda 100644 --- a/trunk/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/trunk/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp, goto out; i = SPUFS_I(filp->f_path.dentry->d_inode); - ret = spufs_run_spu(i->i_ctx, &npc, &status); + ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); if (put_user(npc, unpc)) ret = -EFAULT; @@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) } #endif -asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, - mode_t mode, struct file *neighbor) +asmlinkage long sys_spu_create(const char __user *pathname, + unsigned int flags, mode_t mode) { char *tmp; int ret; @@ -90,7 +90,7 @@ asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, ret = path_lookup(tmp, LOOKUP_PARENT| LOOKUP_OPEN|LOOKUP_CREATE, &nd); if (!ret) { - ret = spufs_create(&nd, flags, mode, neighbor); + ret = spufs_create(&nd, flags, mode); path_release(&nd); } putname(tmp); @@ -99,32 +99,8 @@ asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, return ret; } -#ifndef MODULE -asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags, - mode_t mode, int neighbor_fd) -{ - int fput_needed; - struct file *neighbor; - long ret; - - if (flags & SPU_CREATE_AFFINITY_SPU) { - ret = -EBADF; - neighbor = fget_light(neighbor_fd, &fput_needed); - if (neighbor) { - ret = do_spu_create(pathname, flags, mode, neighbor); - fput_light(neighbor, fput_needed); - } - } - else { - ret = do_spu_create(pathname, flags, mode, NULL); - } - - return ret; -} -#endif - struct spufs_calls spufs_calls = { - .create_thread = do_spu_create, + .create_thread = sys_spu_create, .spu_run = do_spu_run, .owner = THIS_MODULE, }; diff --git a/trunk/arch/powerpc/sysdev/Makefile b/trunk/arch/powerpc/sysdev/Makefile index 484eb4e0e9db..f65078c3d3b3 100644 --- a/trunk/arch/powerpc/sysdev/Makefile +++ b/trunk/arch/powerpc/sysdev/Makefile @@ -17,7 +17,6 @@ obj-$(CONFIG_QUICC_ENGINE) += qe_lib/ mv64x60-$(CONFIG_PCI) += mv64x60_pci.o obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o -obj-$(CONFIG_AXON_RAM) += axonram.o # contains only the suspend handler for time ifeq ($(CONFIG_RTC_CLASS),) diff --git a/trunk/arch/powerpc/sysdev/axonram.c b/trunk/arch/powerpc/sysdev/axonram.c deleted file mode 100644 index 2326d5dc5752..000000000000 --- a/trunk/arch/powerpc/sysdev/axonram.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * (C) Copyright IBM Deutschland Entwicklung GmbH 2006 - * - * Author: Maxim Shchetynin - * - * Axon DDR2 device driver. - * It registers one block device per Axon's DDR2 memory bank found on a system. - * Block devices are called axonram?, their major and minor numbers are - * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define AXON_RAM_MODULE_NAME "axonram" -#define AXON_RAM_DEVICE_NAME "axonram" -#define AXON_RAM_MINORS_PER_DISK 16 -#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT -#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT -#define AXON_RAM_SECTOR_SHIFT 9 -#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT -#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING - -struct axon_ram_bank { - struct of_device *device; - struct gendisk *disk; - unsigned int irq_correctable; - unsigned int irq_uncorrectable; - unsigned long ph_addr; - unsigned long io_addr; - unsigned long size; - unsigned long ecc_counter; -}; - -static ssize_t -axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct of_device *device = to_of_device(dev); - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank); - - return sprintf(buf, "%ld\n", bank->ecc_counter); -} - -static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL); - -/** - * axon_ram_irq_handler - interrupt handler for Axon RAM ECC - * @irq: interrupt ID - * @dev: pointer to of_device - */ -static irqreturn_t -axon_ram_irq_handler(int irq, void *dev) -{ - struct of_device *device = dev; - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank); - - if (irq == bank->irq_correctable) { - dev_err(&device->dev, "Correctable memory error occured\n"); - bank->ecc_counter++; - return IRQ_HANDLED; - } else if (irq == bank->irq_uncorrectable) { - dev_err(&device->dev, "Uncorrectable memory error occured\n"); - panic("Critical ECC error on %s", device->node->full_name); - } - - return IRQ_NONE; -} - -/** - * axon_ram_make_request - make_request() method for block device - * @queue, @bio: see blk_queue_make_request() - */ -static int -axon_ram_make_request(struct request_queue *queue, struct bio *bio) -{ - struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; - unsigned long phys_mem, phys_end; - void *user_mem; - struct bio_vec *vec; - unsigned int transfered; - unsigned short idx; - int rc = 0; - - phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT); - phys_end = bank->io_addr + bank->size; - transfered = 0; - bio_for_each_segment(vec, bio, idx) { - if (unlikely(phys_mem + vec->bv_len > phys_end)) { - bio_io_error(bio, bio->bi_size); - rc = -ERANGE; - break; - } - - user_mem = page_address(vec->bv_page) + vec->bv_offset; - if (bio_data_dir(bio) == READ) - memcpy(user_mem, (void *) phys_mem, vec->bv_len); - else - memcpy((void *) phys_mem, user_mem, vec->bv_len); - - phys_mem += vec->bv_len; - transfered += vec->bv_len; - } - bio_endio(bio, transfered, 0); - - return rc; -} - -/** - * axon_ram_direct_access - direct_access() method for block device - * @device, @sector, @data: see block_device_operations method - */ -static int -axon_ram_direct_access(struct block_device *device, sector_t sector, - unsigned long *data) -{ - struct axon_ram_bank *bank = device->bd_disk->private_data; - loff_t offset; - - offset = sector << AXON_RAM_SECTOR_SHIFT; - if (offset >= bank->size) { - dev_err(&bank->device->dev, "Access outside of address space\n"); - return -ERANGE; - } - - *data = bank->ph_addr + offset; - - return 0; -} - -static struct block_device_operations axon_ram_devops = { - .owner = THIS_MODULE, - .direct_access = axon_ram_direct_access -}; - -/** - * axon_ram_probe - probe() method for platform driver - * @device, @device_id: see of_platform_driver method - */ -static int -axon_ram_probe(struct of_device *device, const struct of_device_id *device_id) -{ - static int axon_ram_bank_id = -1; - struct axon_ram_bank *bank; - struct resource resource; - int rc = 0; - - axon_ram_bank_id++; - - dev_info(&device->dev, "Found memory controller on %s\n", - device->node->full_name); - - bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL); - if (bank == NULL) { - dev_err(&device->dev, "Out of memory\n"); - rc = -ENOMEM; - goto failed; - } - - device->dev.platform_data = bank; - - bank->device = device; - - if (of_address_to_resource(device->node, 0, &resource) != 0) { - dev_err(&device->dev, "Cannot access device tree\n"); - rc = -EFAULT; - goto failed; - } - - bank->size = resource.end - resource.start + 1; - - if (bank->size == 0) { - dev_err(&device->dev, "No DDR2 memory found for %s%d\n", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id); - rc = -ENODEV; - goto failed; - } - - dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); - - bank->ph_addr = resource.start; - bank->io_addr = (unsigned long) ioremap_flags( - bank->ph_addr, bank->size, _PAGE_NO_CACHE); - if (bank->io_addr == 0) { - dev_err(&device->dev, "ioremap() failed\n"); - rc = -EFAULT; - goto failed; - } - - bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK); - if (bank->disk == NULL) { - dev_err(&device->dev, "Cannot register disk\n"); - rc = -EFAULT; - goto failed; - } - - bank->disk->first_minor = 0; - bank->disk->fops = &axon_ram_devops; - bank->disk->private_data = bank; - bank->disk->driverfs_dev = &device->dev; - - sprintf(bank->disk->disk_name, "%s%d", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id); - bank->disk->major = register_blkdev(0, bank->disk->disk_name); - if (bank->disk->major < 0) { - dev_err(&device->dev, "Cannot register block device\n"); - rc = -EFAULT; - goto failed; - } - - bank->disk->queue = blk_alloc_queue(GFP_KERNEL); - if (bank->disk->queue == NULL) { - dev_err(&device->dev, "Cannot register disk queue\n"); - rc = -EFAULT; - goto failed; - } - - set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); - blk_queue_make_request(bank->disk->queue, axon_ram_make_request); - blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); - add_disk(bank->disk); - - bank->irq_correctable = irq_of_parse_and_map(device->node, 0); - bank->irq_uncorrectable = irq_of_parse_and_map(device->node, 1); - if ((bank->irq_correctable <= 0) || (bank->irq_uncorrectable <= 0)) { - dev_err(&device->dev, "Cannot access ECC interrupt ID\n"); - rc = -EFAULT; - goto failed; - } - - rc = request_irq(bank->irq_correctable, axon_ram_irq_handler, - AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); - if (rc != 0) { - dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); - bank->irq_correctable = bank->irq_uncorrectable = 0; - rc = -EFAULT; - goto failed; - } - - rc = request_irq(bank->irq_uncorrectable, axon_ram_irq_handler, - AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); - if (rc != 0) { - dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); - bank->irq_uncorrectable = 0; - rc = -EFAULT; - goto failed; - } - - rc = device_create_file(&device->dev, &dev_attr_ecc); - if (rc != 0) { - dev_err(&device->dev, "Cannot create sysfs file\n"); - rc = -EFAULT; - goto failed; - } - - return 0; - -failed: - if (bank != NULL) { - if (bank->irq_uncorrectable > 0) - free_irq(bank->irq_uncorrectable, device); - if (bank->irq_correctable > 0) - free_irq(bank->irq_correctable, device); - if (bank->disk != NULL) { - if (bank->disk->queue != NULL) - blk_cleanup_queue(bank->disk->queue); - if (bank->disk->major > 0) - unregister_blkdev(bank->disk->major, - bank->disk->disk_name); - del_gendisk(bank->disk); - } - device->dev.platform_data = NULL; - if (bank->io_addr != 0) - iounmap((void __iomem *) bank->io_addr); - kfree(bank); - } - - return rc; -} - -/** - * axon_ram_remove - remove() method for platform driver - * @device: see of_platform_driver method - */ -static int -axon_ram_remove(struct of_device *device) -{ - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank || !bank->disk); - - device_remove_file(&device->dev, &dev_attr_ecc); - free_irq(bank->irq_uncorrectable, device); - free_irq(bank->irq_correctable, device); - blk_cleanup_queue(bank->disk->queue); - unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); - iounmap((void __iomem *) bank->io_addr); - kfree(bank); - - return 0; -} - -static struct of_device_id axon_ram_device_id[] = { - { - .type = "dma-memory" - }, - {} -}; - -static struct of_platform_driver axon_ram_driver = { - .owner = THIS_MODULE, - .name = AXON_RAM_MODULE_NAME, - .match_table = axon_ram_device_id, - .probe = axon_ram_probe, - .remove = axon_ram_remove -}; - -/** - * axon_ram_init - */ -static int __init -axon_ram_init(void) -{ - return of_register_platform_driver(&axon_ram_driver); -} - -/** - * axon_ram_exit - */ -static void __exit -axon_ram_exit(void) -{ - of_unregister_platform_driver(&axon_ram_driver); -} - -module_init(axon_ram_init); -module_exit(axon_ram_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Maxim Shchetynin "); -MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE"); diff --git a/trunk/arch/powerpc/sysdev/pmi.c b/trunk/arch/powerpc/sysdev/pmi.c index 2f91b55b7754..85a7c99c1003 100644 --- a/trunk/arch/powerpc/sysdev/pmi.c +++ b/trunk/arch/powerpc/sysdev/pmi.c @@ -48,13 +48,15 @@ struct pmi_data { struct work_struct work; }; -static struct pmi_data *data; static int pmi_irq_handler(int irq, void *dev_id) { + struct pmi_data *data; u8 type; int rc; + data = dev_id; + spin_lock(&data->pmi_spinlock); type = ioread8(data->pmi_reg + PMI_READ_TYPE); @@ -109,13 +111,16 @@ MODULE_DEVICE_TABLE(of, pmi_match); static void pmi_notify_handlers(struct work_struct *work) { + struct pmi_data *data; struct pmi_handler *handler; + data = container_of(work, struct pmi_data, work); + spin_lock(&data->handler_spinlock); list_for_each_entry(handler, &data->handler, node) { pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler); if (handler->type == data->msg.type) - handler->handle_pmi_message(data->msg); + handler->handle_pmi_message(data->dev, data->msg); } spin_unlock(&data->handler_spinlock); } @@ -124,14 +129,9 @@ static int pmi_of_probe(struct of_device *dev, const struct of_device_id *match) { struct device_node *np = dev->node; + struct pmi_data *data; int rc; - if (data) { - printk(KERN_ERR "pmi: driver has already been initialized.\n"); - rc = -EBUSY; - goto out; - } - data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL); if (!data) { printk(KERN_ERR "pmi: could not allocate memory.\n"); @@ -154,6 +154,7 @@ static int pmi_of_probe(struct of_device *dev, INIT_WORK(&data->work, pmi_notify_handlers); + dev->dev.driver_data = data; data->dev = dev; data->irq = irq_of_parse_and_map(np, 0); @@ -163,7 +164,7 @@ static int pmi_of_probe(struct of_device *dev, goto error_cleanup_iomap; } - rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL); + rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", data); if (rc) { printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n", data->irq, rc); @@ -186,9 +187,12 @@ static int pmi_of_probe(struct of_device *dev, static int pmi_of_remove(struct of_device *dev) { + struct pmi_data *data; struct pmi_handler *handler, *tmp; - free_irq(data->irq, NULL); + data = dev->dev.driver_data; + + free_irq(data->irq, data); iounmap(data->pmi_reg); spin_lock(&data->handler_spinlock); @@ -198,8 +202,7 @@ static int pmi_of_remove(struct of_device *dev) spin_unlock(&data->handler_spinlock); - kfree(data); - data = NULL; + kfree(dev->dev.driver_data); return 0; } @@ -223,13 +226,13 @@ static void __exit pmi_module_exit(void) } module_exit(pmi_module_exit); -int pmi_send_message(pmi_message_t msg) +void pmi_send_message(struct of_device *device, pmi_message_t msg) { + struct pmi_data *data; unsigned long flags; DECLARE_COMPLETION_ONSTACK(completion); - if (!data) - return -ENODEV; + data = device->dev.driver_data; mutex_lock(&data->msg_mutex); @@ -253,26 +256,30 @@ int pmi_send_message(pmi_message_t msg) data->completion = NULL; mutex_unlock(&data->msg_mutex); - - return 0; } EXPORT_SYMBOL_GPL(pmi_send_message); -int pmi_register_handler(struct pmi_handler *handler) +void pmi_register_handler(struct of_device *device, + struct pmi_handler *handler) { + struct pmi_data *data; + data = device->dev.driver_data; + if (!data) - return -ENODEV; + return; spin_lock(&data->handler_spinlock); list_add_tail(&handler->node, &data->handler); spin_unlock(&data->handler_spinlock); - - return 0; } EXPORT_SYMBOL_GPL(pmi_register_handler); -void pmi_unregister_handler(struct pmi_handler *handler) +void pmi_unregister_handler(struct of_device *device, + struct pmi_handler *handler) { + struct pmi_data *data; + data = device->dev.driver_data; + if (!data) return; diff --git a/trunk/drivers/media/dvb/ttpci/av7110.c b/trunk/drivers/media/dvb/ttpci/av7110.c index 2cee9e3bd29f..8178832d14a8 100644 --- a/trunk/drivers/media/dvb/ttpci/av7110.c +++ b/trunk/drivers/media/dvb/ttpci/av7110.c @@ -2267,7 +2267,7 @@ static int frontend_init(struct av7110 *av7110) FE_FUNC_OVERRIDE(av7110->fe->ops.diseqc_send_master_cmd, av7110->fe_diseqc_send_master_cmd, av7110_fe_diseqc_send_master_cmd); FE_FUNC_OVERRIDE(av7110->fe->ops.diseqc_send_burst, av7110->fe_diseqc_send_burst, av7110_fe_diseqc_send_burst); FE_FUNC_OVERRIDE(av7110->fe->ops.set_tone, av7110->fe_set_tone, av7110_fe_set_tone); - FE_FUNC_OVERRIDE(av7110->fe->ops.set_voltage, av7110->fe_set_voltage, av7110_fe_set_voltage;) + FE_FUNC_OVERRIDE(av7110->fe->ops.set_voltage, av7110->fe_set_voltage, av7110_fe_set_voltage); FE_FUNC_OVERRIDE(av7110->fe->ops.dishnetwork_send_legacy_command, av7110->fe_dishnetwork_send_legacy_command, av7110_fe_dishnetwork_send_legacy_command); FE_FUNC_OVERRIDE(av7110->fe->ops.set_frontend, av7110->fe_set_frontend, av7110_fe_set_frontend); diff --git a/trunk/drivers/oprofile/buffer_sync.c b/trunk/drivers/oprofile/buffer_sync.c index 8134c7e198a5..edd6de995726 100644 --- a/trunk/drivers/oprofile/buffer_sync.c +++ b/trunk/drivers/oprofile/buffer_sync.c @@ -26,9 +26,8 @@ #include #include #include -#include #include - + #include "oprofile_stats.h" #include "event_buffer.h" #include "cpu_buffer.h" diff --git a/trunk/drivers/oprofile/event_buffer.h b/trunk/drivers/oprofile/event_buffer.h index 5076ed1ebd8f..9b6a4ebd03e3 100644 --- a/trunk/drivers/oprofile/event_buffer.h +++ b/trunk/drivers/oprofile/event_buffer.h @@ -19,10 +19,28 @@ void free_event_buffer(void); /* wake up the process sleeping on the event file */ void wake_up_buffer_waiter(void); - + +/* Each escaped entry is prefixed by ESCAPE_CODE + * then one of the following codes, then the + * relevant data. + */ +#define ESCAPE_CODE ~0UL +#define CTX_SWITCH_CODE 1 +#define CPU_SWITCH_CODE 2 +#define COOKIE_SWITCH_CODE 3 +#define KERNEL_ENTER_SWITCH_CODE 4 +#define KERNEL_EXIT_SWITCH_CODE 5 +#define MODULE_LOADED_CODE 6 +#define CTX_TGID_CODE 7 +#define TRACE_BEGIN_CODE 8 +#define TRACE_END_CODE 9 + #define INVALID_COOKIE ~0UL #define NO_COOKIE 0UL +/* add data to the event buffer */ +void add_event_entry(unsigned long data); + extern const struct file_operations event_buffer_fops; /* mutex between sync_cpu_buffers() and the diff --git a/trunk/drivers/oprofile/oprof.c b/trunk/drivers/oprofile/oprof.c index 2c645170f06e..e5162a64018b 100644 --- a/trunk/drivers/oprofile/oprof.c +++ b/trunk/drivers/oprofile/oprof.c @@ -53,24 +53,9 @@ int oprofile_setup(void) * us missing task deaths and eventually oopsing * when trying to process the event buffer. */ - if (oprofile_ops.sync_start) { - int sync_ret = oprofile_ops.sync_start(); - switch (sync_ret) { - case 0: - goto post_sync; - case 1: - goto do_generic; - case -1: - goto out3; - default: - goto out3; - } - } -do_generic: if ((err = sync_start())) goto out3; -post_sync: is_setup = 1; mutex_unlock(&start_mutex); return 0; @@ -133,20 +118,7 @@ void oprofile_stop(void) void oprofile_shutdown(void) { mutex_lock(&start_mutex); - if (oprofile_ops.sync_stop) { - int sync_ret = oprofile_ops.sync_stop(); - switch (sync_ret) { - case 0: - goto post_sync; - case 1: - goto do_generic; - default: - goto post_sync; - } - } -do_generic: sync_stop(); -post_sync: if (oprofile_ops.shutdown) oprofile_ops.shutdown(); is_setup = 0; diff --git a/trunk/include/asm-powerpc/oprofile_impl.h b/trunk/include/asm-powerpc/oprofile_impl.h index 938fefb4c4bc..8d6b47f7b300 100644 --- a/trunk/include/asm-powerpc/oprofile_impl.h +++ b/trunk/include/asm-powerpc/oprofile_impl.h @@ -39,16 +39,14 @@ struct op_system_config { /* Per-arch configuration */ struct op_powerpc_model { - int (*reg_setup) (struct op_counter_config *, + void (*reg_setup) (struct op_counter_config *, struct op_system_config *, int num_counters); - int (*cpu_setup) (struct op_counter_config *); - int (*start) (struct op_counter_config *); - int (*global_start) (struct op_counter_config *); + void (*cpu_setup) (struct op_counter_config *); + void (*start) (struct op_counter_config *); + void (*global_start) (struct op_counter_config *); void (*stop) (void); void (*global_stop) (void); - int (*sync_start)(void); - int (*sync_stop)(void); void (*handle_interrupt) (struct pt_regs *, struct op_counter_config *); int num_counters; diff --git a/trunk/include/asm-powerpc/pmi.h b/trunk/include/asm-powerpc/pmi.h index 2259d4ce3846..cb0f8aa43088 100644 --- a/trunk/include/asm-powerpc/pmi.h +++ b/trunk/include/asm-powerpc/pmi.h @@ -55,13 +55,13 @@ typedef struct { struct pmi_handler { struct list_head node; u8 type; - void (*handle_pmi_message) (pmi_message_t); + void (*handle_pmi_message) (struct of_device *, pmi_message_t); }; -int pmi_register_handler(struct pmi_handler *); -void pmi_unregister_handler(struct pmi_handler *); +void pmi_register_handler(struct of_device *, struct pmi_handler *); +void pmi_unregister_handler(struct of_device *, struct pmi_handler *); -int pmi_send_message(pmi_message_t); +void pmi_send_message(struct of_device *, pmi_message_t); #endif /* __KERNEL__ */ #endif /* _POWERPC_PMI_H */ diff --git a/trunk/include/asm-powerpc/spu.h b/trunk/include/asm-powerpc/spu.h index 8836c0f1f2f7..eedc828cef2d 100644 --- a/trunk/include/asm-powerpc/spu.h +++ b/trunk/include/asm-powerpc/spu.h @@ -107,10 +107,10 @@ struct spu_runqueue; struct device_node; enum spu_utilization_state { - SPU_UTIL_USER, SPU_UTIL_SYSTEM, + SPU_UTIL_USER, SPU_UTIL_IOWAIT, - SPU_UTIL_IDLE_LOADED, + SPU_UTIL_IDLE, SPU_UTIL_MAX }; @@ -121,9 +121,9 @@ struct spu { unsigned long problem_phys; struct spu_problem __iomem *problem; struct spu_priv2 __iomem *priv2; - struct list_head cbe_list; + struct list_head list; + struct list_head sched_list; struct list_head full_list; - enum { SPU_FREE, SPU_USED } alloc_state; int number; unsigned int irqs[3]; u32 node; @@ -137,7 +137,6 @@ struct spu { struct spu_runqueue *rq; unsigned long long timestamp; pid_t pid; - pid_t tgid; int class_0_pending; spinlock_t register_lock; @@ -166,14 +165,11 @@ struct spu { struct sys_device sysdev; - int has_mem_affinity; - struct list_head aff_list; - struct { /* protected by interrupt reentrancy */ - enum spu_utilization_state util_state; - unsigned long long tstamp; - unsigned long long times[SPU_UTIL_MAX]; + enum spu_utilization_state utilization_state; + unsigned long tstamp; /* time of last ctx switch */ + unsigned long times[SPU_UTIL_MAX]; unsigned long long vol_ctx_switch; unsigned long long invol_ctx_switch; unsigned long long min_flt; @@ -185,29 +181,13 @@ struct spu { } stats; }; -struct cbe_spu_info { - struct mutex list_mutex; - struct list_head spus; - int n_spus; - int nr_active; - atomic_t reserved_spus; -}; - -extern struct cbe_spu_info cbe_spu_info[]; - -void spu_init_channels(struct spu *spu); +struct spu *spu_alloc(void); +struct spu *spu_alloc_node(int node); +void spu_free(struct spu *spu); int spu_irq_class_0_bottom(struct spu *spu); int spu_irq_class_1_bottom(struct spu *spu); void spu_irq_setaffinity(struct spu *spu, int cpu); -#ifdef CONFIG_KEXEC -void crash_register_spus(struct list_head *list); -#else -static inline void crash_register_spus(struct list_head *list) -{ -} -#endif - extern void spu_invalidate_slbs(struct spu *spu); extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); @@ -215,20 +195,6 @@ extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); struct mm_struct; extern void spu_flush_all_slbs(struct mm_struct *mm); -/* This interface allows a profiler (e.g., OProfile) to store a ref - * to spu context information that it creates. This caching technique - * avoids the need to recreate this information after a save/restore operation. - * - * Assumes the caller has already incremented the ref count to - * profile_info; then spu_context_destroy must call kref_put - * on prof_info_kref. - */ -void spu_set_profile_private_kref(struct spu_context *ctx, - struct kref *prof_info_kref, - void ( * prof_info_release) (struct kref *kref)); - -void *spu_get_profile_private_kref(struct spu_context *ctx); - /* system callbacks from the SPU */ struct spu_syscall_block { u64 nr_ret; @@ -240,8 +206,7 @@ extern long spu_sys_callback(struct spu_syscall_block *s); struct file; extern struct spufs_calls { asmlinkage long (*create_thread)(const char __user *name, - unsigned int flags, mode_t mode, - struct file *neighbor); + unsigned int flags, mode_t mode); asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc, __u32 __user *ustatus); struct module *owner; @@ -268,10 +233,8 @@ struct spu_coredump_calls { #define SPU_CREATE_GANG 0x0002 #define SPU_CREATE_NOSCHED 0x0004 #define SPU_CREATE_ISOLATE 0x0008 -#define SPU_CREATE_AFFINITY_SPU 0x0010 -#define SPU_CREATE_AFFINITY_MEM 0x0020 -#define SPU_CREATE_FLAG_ALL 0x003f /* mask of all valid flags */ +#define SPU_CREATE_FLAG_ALL 0x000f /* mask of all valid flags */ #ifdef CONFIG_SPU_FS_MODULE @@ -440,7 +403,6 @@ struct spu_priv2 { #define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0) #define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0) #define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0) -#define MFC_CNTL_SUSPEND_MASK (1ull << 4) #define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8) #define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8) #define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8) diff --git a/trunk/include/asm-powerpc/spu_csa.h b/trunk/include/asm-powerpc/spu_csa.h index e87794d5d4ea..c48ae185c874 100644 --- a/trunk/include/asm-powerpc/spu_csa.h +++ b/trunk/include/asm-powerpc/spu_csa.h @@ -50,12 +50,6 @@ #define SPU_STOPPED_STATUS_P_I 8 #define SPU_STOPPED_STATUS_R 9 -/* - * Definitions for software decrementer status flag. - */ -#define SPU_DECR_STATUS_RUNNING 0x1 -#define SPU_DECR_STATUS_WRAPPED 0x2 - #ifndef __ASSEMBLY__ /** * spu_reg128 - generic 128-bit register definition. @@ -69,7 +63,7 @@ struct spu_reg128 { * @gprs: Array of saved registers. * @fpcr: Saved floating point status control register. * @decr: Saved decrementer value. - * @decr_status: Indicates software decrementer status flags. + * @decr_status: Indicates decrementer run status. * @ppu_mb: Saved PPU mailbox data. * @ppuint_mb: Saved PPU interrupting mailbox data. * @tag_mask: Saved tag group mask. diff --git a/trunk/include/linux/dcookies.h b/trunk/include/linux/dcookies.h index 98c69ab80c84..0fe7cdf326f7 100644 --- a/trunk/include/linux/dcookies.h +++ b/trunk/include/linux/dcookies.h @@ -12,7 +12,6 @@ #ifdef CONFIG_PROFILING -#include #include struct dcookie_user; diff --git a/trunk/include/linux/elf-em.h b/trunk/include/linux/elf-em.h index 5834e843a946..0311bad838b1 100644 --- a/trunk/include/linux/elf-em.h +++ b/trunk/include/linux/elf-em.h @@ -20,8 +20,7 @@ #define EM_PARISC 15 /* HPPA */ #define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ #define EM_PPC 20 /* PowerPC */ -#define EM_PPC64 21 /* PowerPC64 */ -#define EM_SPU 23 /* Cell BE SPU */ +#define EM_PPC64 21 /* PowerPC64 */ #define EM_SH 42 /* SuperH */ #define EM_SPARCV9 43 /* SPARC v9 64-bit */ #define EM_IA_64 50 /* HP/Intel IA-64 */ diff --git a/trunk/include/linux/oprofile.h b/trunk/include/linux/oprofile.h index 041bb31100f4..0d514b252454 100644 --- a/trunk/include/linux/oprofile.h +++ b/trunk/include/linux/oprofile.h @@ -17,26 +17,6 @@ #include #include -/* Each escaped entry is prefixed by ESCAPE_CODE - * then one of the following codes, then the - * relevant data. - * These #defines live in this file so that arch-specific - * buffer sync'ing code can access them. - */ -#define ESCAPE_CODE ~0UL -#define CTX_SWITCH_CODE 1 -#define CPU_SWITCH_CODE 2 -#define COOKIE_SWITCH_CODE 3 -#define KERNEL_ENTER_SWITCH_CODE 4 -#define KERNEL_EXIT_SWITCH_CODE 5 -#define MODULE_LOADED_CODE 6 -#define CTX_TGID_CODE 7 -#define TRACE_BEGIN_CODE 8 -#define TRACE_END_CODE 9 -#define XEN_ENTER_SWITCH_CODE 10 -#define SPU_PROFILING_CODE 11 -#define SPU_CTX_SWITCH_CODE 12 - struct super_block; struct dentry; struct file_operations; @@ -55,14 +35,6 @@ struct oprofile_operations { int (*start)(void); /* Stop delivering interrupts. */ void (*stop)(void); - /* Arch-specific buffer sync functions. - * Return value = 0: Success - * Return value = -1: Failure - * Return value = 1: Run generic sync function - */ - int (*sync_start)(void); - int (*sync_stop)(void); - /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); /* CPU identification string. */ @@ -83,13 +55,6 @@ int oprofile_arch_init(struct oprofile_operations * ops); */ void oprofile_arch_exit(void); -/** - * Add data to the event buffer. - * The data passed is free-form, but typically consists of - * file offsets, dcookies, context information, and ESCAPE codes. - */ -void add_event_entry(unsigned long data); - /** * Add a sample. This may be called from any context. Pass * smp_processor_id() as cpu. diff --git a/trunk/include/linux/syscalls.h b/trunk/include/linux/syscalls.h index 61def7c8fbb3..7a8b1e3322e0 100644 --- a/trunk/include/linux/syscalls.h +++ b/trunk/include/linux/syscalls.h @@ -549,7 +549,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd); asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus); asmlinkage long sys_spu_create(const char __user *name, - unsigned int flags, mode_t mode, int fd); + unsigned int flags, mode_t mode); asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode, unsigned dev); diff --git a/trunk/kernel/time.c b/trunk/kernel/time.c index 5b81da08bbdb..e325597f5bf5 100644 --- a/trunk/kernel/time.c +++ b/trunk/kernel/time.c @@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz); */ asmlinkage long sys_time(time_t __user * tloc) { - time_t i; - struct timespec tv; + /* + * We read xtime.tv_sec atomically - it's updated + * atomically by update_wall_time(), so no need to + * even read-lock the xtime seqlock: + */ + time_t i = xtime.tv_sec; - getnstimeofday(&tv); - i = tv.tv_sec; + smp_rmb(); /* sys_time() results are coherent */ if (tloc) { - if (put_user(i,tloc)) + if (put_user(i, tloc)) i = -EFAULT; } return i;