-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
yaml --- r: 62262 b: refs/heads/master c: 1474855 h: refs/heads/master v: v3
- Loading branch information
Bob Nelson
authored and
Arnd Bergmann
committed
Jul 20, 2007
1 parent
cd707c7
commit bdfcf5d
Showing
27 changed files
with
1,829 additions
and
134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
--- | ||
refs/heads/master: 36aaccc1e96481e8310b1d13600096da0f24ff43 | ||
refs/heads/master: 1474855d0878cced6f39f51f3c2bd7428b44cb1e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Cell Broadband Engine OProfile Support | ||
* | ||
* (C) Copyright IBM Corporation 2006 | ||
* | ||
* Author: Maynard Johnson <maynardj@us.ibm.com> | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU General Public License | ||
* as published by the Free Software Foundation; either version | ||
* 2 of the License, or (at your option) any later version. | ||
*/ | ||
|
||
#ifndef PR_UTIL_H | ||
#define PR_UTIL_H | ||
|
||
#include <linux/cpumask.h> | ||
#include <linux/oprofile.h> | ||
#include <asm/cell-pmu.h> | ||
#include <asm/spu.h> | ||
|
||
#include "../../platforms/cell/cbe_regs.h" | ||
|
||
/* Defines used for sync_start */ | ||
#define SKIP_GENERIC_SYNC 0 | ||
#define SYNC_START_ERROR -1 | ||
#define DO_GENERIC_SYNC 1 | ||
|
||
struct spu_overlay_info { /* map of sections within an SPU overlay */ | ||
unsigned int vma; /* SPU virtual memory address from elf */ | ||
unsigned int size; /* size of section from elf */ | ||
unsigned int offset; /* offset of section into elf file */ | ||
unsigned int buf; | ||
}; | ||
|
||
struct vma_to_fileoffset_map { /* map of sections within an SPU program */ | ||
struct vma_to_fileoffset_map *next; /* list pointer */ | ||
unsigned int vma; /* SPU virtual memory address from elf */ | ||
unsigned int size; /* size of section from elf */ | ||
unsigned int offset; /* offset of section into elf file */ | ||
unsigned int guard_ptr; | ||
unsigned int guard_val; | ||
/* | ||
* The guard pointer is an entry in the _ovly_buf_table, | ||
* computed using ovly.buf as the index into the table. Since | ||
* ovly.buf values begin at '1' to reference the first (or 0th) | ||
* entry in the _ovly_buf_table, the computation subtracts 1 | ||
* from ovly.buf. | ||
* The guard value is stored in the _ovly_buf_table entry and | ||
* is an index (starting at 1) back to the _ovly_table entry | ||
* that is pointing at this _ovly_buf_table entry. So, for | ||
* example, for an overlay scenario with one overlay segment | ||
* and two overlay sections: | ||
* - Section 1 points to the first entry of the | ||
* _ovly_buf_table, which contains a guard value | ||
* of '1', referencing the first (index=0) entry of | ||
* _ovly_table. | ||
* - Section 2 points to the second entry of the | ||
* _ovly_buf_table, which contains a guard value | ||
* of '2', referencing the second (index=1) entry of | ||
* _ovly_table. | ||
*/ | ||
|
||
}; | ||
|
||
/* The three functions below are for maintaining and accessing | ||
* the vma-to-fileoffset map. | ||
*/ | ||
struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, | ||
u64 objectid); | ||
unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, | ||
unsigned int vma, const struct spu *aSpu, | ||
int *grd_val); | ||
void vma_map_free(struct vma_to_fileoffset_map *map); | ||
|
||
/* | ||
* Entry point for SPU profiling. | ||
* cycles_reset is the SPU_CYCLES count value specified by the user. | ||
*/ | ||
int start_spu_profiling(unsigned int cycles_reset); | ||
|
||
void stop_spu_profiling(void); | ||
|
||
|
||
/* add the necessary profiling hooks */ | ||
int spu_sync_start(void); | ||
|
||
/* remove the hooks */ | ||
int spu_sync_stop(void); | ||
|
||
/* Record SPU program counter samples to the oprofile event buffer. */ | ||
void spu_sync_buffer(int spu_num, unsigned int *samples, | ||
int num_samples); | ||
|
||
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset); | ||
|
||
#endif /* PR_UTIL_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
/* | ||
* Cell Broadband Engine OProfile Support | ||
* | ||
* (C) Copyright IBM Corporation 2006 | ||
* | ||
* Authors: Maynard Johnson <maynardj@us.ibm.com> | ||
* Carl Love <carll@us.ibm.com> | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU General Public License | ||
* as published by the Free Software Foundation; either version | ||
* 2 of the License, or (at your option) any later version. | ||
*/ | ||
|
||
#include <linux/hrtimer.h> | ||
#include <linux/smp.h> | ||
#include <linux/slab.h> | ||
#include <asm/cell-pmu.h> | ||
#include "pr_util.h" | ||
|
||
#define TRACE_ARRAY_SIZE 1024 | ||
#define SCALE_SHIFT 14 | ||
|
||
static u32 *samples; | ||
|
||
static int spu_prof_running; | ||
static unsigned int profiling_interval; | ||
|
||
#define NUM_SPU_BITS_TRBUF 16 | ||
#define SPUS_PER_TB_ENTRY 4 | ||
#define SPUS_PER_NODE 8 | ||
|
||
#define SPU_PC_MASK 0xFFFF | ||
|
||
static DEFINE_SPINLOCK(sample_array_lock); | ||
unsigned long sample_array_lock_flags; | ||
|
||
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) | ||
{ | ||
unsigned long ns_per_cyc; | ||
|
||
if (!freq_khz) | ||
freq_khz = ppc_proc_freq/1000; | ||
|
||
/* To calculate a timeout in nanoseconds, the basic | ||
* formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency). | ||
* To avoid floating point math, we use the scale math | ||
* technique as described in linux/jiffies.h. We use | ||
* a scale factor of SCALE_SHIFT, which provides 4 decimal places | ||
* of precision. This is close enough for the purpose at hand. | ||
* | ||
* The value of the timeout should be small enough that the hw | ||
* trace buffer will not get more then about 1/3 full for the | ||
* maximum user specified (the LFSR value) hw sampling frequency. | ||
* This is to ensure the trace buffer will never fill even if the | ||
* kernel thread scheduling varies under a heavy system load. | ||
*/ | ||
|
||
ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz; | ||
profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT; | ||
|
||
} | ||
|
||
/* | ||
* Extract SPU PC from trace buffer entry | ||
*/ | ||
static void spu_pc_extract(int cpu, int entry) | ||
{ | ||
/* the trace buffer is 128 bits */ | ||
u64 trace_buffer[2]; | ||
u64 spu_mask; | ||
int spu; | ||
|
||
spu_mask = SPU_PC_MASK; | ||
|
||
/* Each SPU PC is 16 bits; hence, four spus in each of | ||
* the two 64-bit buffer entries that make up the | ||
* 128-bit trace_buffer entry. Process two 64-bit values | ||
* simultaneously. | ||
* trace[0] SPU PC contents are: 0 1 2 3 | ||
* trace[1] SPU PC contents are: 4 5 6 7 | ||
*/ | ||
|
||
cbe_read_trace_buffer(cpu, trace_buffer); | ||
|
||
for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) { | ||
/* spu PC trace entry is upper 16 bits of the | ||
* 18 bit SPU program counter | ||
*/ | ||
samples[spu * TRACE_ARRAY_SIZE + entry] | ||
= (spu_mask & trace_buffer[0]) << 2; | ||
samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry] | ||
= (spu_mask & trace_buffer[1]) << 2; | ||
|
||
trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF; | ||
trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF; | ||
} | ||
} | ||
|
||
static int cell_spu_pc_collection(int cpu) | ||
{ | ||
u32 trace_addr; | ||
int entry; | ||
|
||
/* process the collected SPU PC for the node */ | ||
|
||
entry = 0; | ||
|
||
trace_addr = cbe_read_pm(cpu, trace_address); | ||
while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { | ||
/* there is data in the trace buffer to process */ | ||
spu_pc_extract(cpu, entry); | ||
|
||
entry++; | ||
|
||
if (entry >= TRACE_ARRAY_SIZE) | ||
/* spu_samples is full */ | ||
break; | ||
|
||
trace_addr = cbe_read_pm(cpu, trace_address); | ||
} | ||
|
||
return entry; | ||
} | ||
|
||
|
||
static enum hrtimer_restart profile_spus(struct hrtimer *timer) | ||
{ | ||
ktime_t kt; | ||
int cpu, node, k, num_samples, spu_num; | ||
|
||
if (!spu_prof_running) | ||
goto stop; | ||
|
||
for_each_online_cpu(cpu) { | ||
if (cbe_get_hw_thread_id(cpu)) | ||
continue; | ||
|
||
node = cbe_cpu_to_node(cpu); | ||
|
||
/* There should only be one kernel thread at a time processing | ||
* the samples. In the very unlikely case that the processing | ||
* is taking a very long time and multiple kernel threads are | ||
* started to process the samples. Make sure only one kernel | ||
* thread is working on the samples array at a time. The | ||
* sample array must be loaded and then processed for a given | ||
* cpu. The sample array is not per cpu. | ||
*/ | ||
spin_lock_irqsave(&sample_array_lock, | ||
sample_array_lock_flags); | ||
num_samples = cell_spu_pc_collection(cpu); | ||
|
||
if (num_samples == 0) { | ||
spin_unlock_irqrestore(&sample_array_lock, | ||
sample_array_lock_flags); | ||
continue; | ||
} | ||
|
||
for (k = 0; k < SPUS_PER_NODE; k++) { | ||
spu_num = k + (node * SPUS_PER_NODE); | ||
spu_sync_buffer(spu_num, | ||
samples + (k * TRACE_ARRAY_SIZE), | ||
num_samples); | ||
} | ||
|
||
spin_unlock_irqrestore(&sample_array_lock, | ||
sample_array_lock_flags); | ||
|
||
} | ||
smp_wmb(); /* insure spu event buffer updates are written */ | ||
/* don't want events intermingled... */ | ||
|
||
kt = ktime_set(0, profiling_interval); | ||
if (!spu_prof_running) | ||
goto stop; | ||
hrtimer_forward(timer, timer->base->get_time(), kt); | ||
return HRTIMER_RESTART; | ||
|
||
stop: | ||
printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n"); | ||
return HRTIMER_NORESTART; | ||
} | ||
|
||
static struct hrtimer timer; | ||
/* | ||
* Entry point for SPU profiling. | ||
* NOTE: SPU profiling is done system-wide, not per-CPU. | ||
* | ||
* cycles_reset is the count value specified by the user when | ||
* setting up OProfile to count SPU_CYCLES. | ||
*/ | ||
int start_spu_profiling(unsigned int cycles_reset) | ||
{ | ||
ktime_t kt; | ||
|
||
pr_debug("timer resolution: %lu\n", TICK_NSEC); | ||
kt = ktime_set(0, profiling_interval); | ||
hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
timer.expires = kt; | ||
timer.function = profile_spus; | ||
|
||
/* Allocate arrays for collecting SPU PC samples */ | ||
samples = kzalloc(SPUS_PER_NODE * | ||
TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL); | ||
|
||
if (!samples) | ||
return -ENOMEM; | ||
|
||
spu_prof_running = 1; | ||
hrtimer_start(&timer, kt, HRTIMER_MODE_REL); | ||
|
||
return 0; | ||
} | ||
|
||
void stop_spu_profiling(void) | ||
{ | ||
spu_prof_running = 0; | ||
hrtimer_cancel(&timer); | ||
kfree(samples); | ||
pr_debug("SPU_PROF: stop_spu_profiling issued\n"); | ||
} |
Oops, something went wrong.