diff --git a/[refs] b/[refs]
index 099374489261..b25935628eeb 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: cfadf9d4ac4be940595ab73d3def24c23c8b875f
+refs/heads/master: 0ab061cd523a7f2dbf1b59aab0542cb0ab2e4633
diff --git a/trunk/tools/perf/Documentation/perf-kvm.txt b/trunk/tools/perf/Documentation/perf-kvm.txt
index d004e19fe6d6..93400a0f17f0 100644
--- a/trunk/tools/perf/Documentation/perf-kvm.txt
+++ b/trunk/tools/perf/Documentation/perf-kvm.txt
@@ -1,5 +1,5 @@
 perf-kvm(1)
-===========
+==============
 
 NAME
 ----
@@ -41,9 +41,9 @@ There are a couple of variants of perf kvm:
 OPTIONS
 -------
 --host=::
-        Collect host side performance profile.
+        Collect host side perforamnce profile.
 --guest=::
-        Collect guest side performance profile.
+        Collect guest side perforamnce profile.
 --guestmount=<path>::
 	Guest os root file system mount directory. Users mounts guest os
         root directories under <path> by a specific filesystem access method,
@@ -64,5 +64,4 @@ OPTIONS
 
 SEE ALSO
 --------
-linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
-linkperf:perf-diff[1], linkperf:perf-buildid-list[1]
+linkperf:perf-top[1] perf-record[1] perf-report[1] perf-diff[1] perf-buildid-list[1]
diff --git a/trunk/tools/perf/builtin-kmem.c b/trunk/tools/perf/builtin-kmem.c
index ab906cbd5c79..db474bbf3322 100644
--- a/trunk/tools/perf/builtin-kmem.c
+++ b/trunk/tools/perf/builtin-kmem.c
@@ -335,9 +335,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 }
 
 static struct perf_event_ops event_ops = {
-	.sample			= process_sample_event,
-	.comm			= event__process_comm,
-	.ordered_samples	= true,
+	.sample	= process_sample_event,
+	.comm	= event__process_comm,
 };
 
 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -731,6 +730,7 @@ static const char *record_args[] = {
 	"record",
 	"-a",
 	"-R",
+	"-M",
 	"-f",
 	"-c", "1",
 	"-e", "kmem:kmalloc",
diff --git a/trunk/tools/perf/builtin-lock.c b/trunk/tools/perf/builtin-lock.c
index ce276750b140..6c38e4febf9f 100644
--- a/trunk/tools/perf/builtin-lock.c
+++ b/trunk/tools/perf/builtin-lock.c
@@ -23,8 +23,6 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 
-static struct perf_session *session;
-
 /* based on kernel/lockdep.c */
 #define LOCKHASH_BITS		12
 #define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
@@ -34,6 +32,9 @@ static struct list_head lockhash_table[LOCKHASH_SIZE];
 #define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
 #define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
 
+#define LOCK_STATE_UNLOCKED	0	       /* initial state */
+#define LOCK_STATE_LOCKED	1
+
 struct lock_stat {
 	struct list_head	hash_entry;
 	struct rb_node		rb;		/* used for sorting */
@@ -46,151 +47,20 @@ struct lock_stat {
 	void			*addr;		/* address of lockdep_map, used as ID */
 	char			*name;		/* for strcpy(), we cannot use const */
 
-	unsigned int		nr_acquire;
+	int			state;
+	u64			prev_event_time; /* timestamp of previous event */
+
 	unsigned int		nr_acquired;
+	unsigned int		nr_acquire;
 	unsigned int		nr_contended;
 	unsigned int		nr_release;
 
-	unsigned int		nr_readlock;
-	unsigned int		nr_trylock;
 	/* these times are in nano sec. */
 	u64			wait_time_total;
 	u64			wait_time_min;
 	u64			wait_time_max;
-
-	int			discard; /* flag of blacklist */
-};
-
-/*
- * States of lock_seq_stat
- *
- * UNINITIALIZED is required for detecting first event of acquire.
- * As the nature of lock events, there is no guarantee
- * that the first event for the locks are acquire,
- * it can be acquired, contended or release.
- */
-#define SEQ_STATE_UNINITIALIZED      0	       /* initial state */
-#define SEQ_STATE_RELEASED	1
-#define SEQ_STATE_ACQUIRING	2
-#define SEQ_STATE_ACQUIRED	3
-#define SEQ_STATE_READ_ACQUIRED	4
-#define SEQ_STATE_CONTENDED	5
-
-/*
- * MAX_LOCK_DEPTH
- * Imported from include/linux/sched.h.
- * Should this be synchronized?
- */
-#define MAX_LOCK_DEPTH 48
-
-/*
- * struct lock_seq_stat:
- * Place to put on state of one lock sequence
- * 1) acquire -> acquired -> release
- * 2) acquire -> contended -> acquired -> release
- * 3) acquire (with read or try) -> release
- * 4) Are there other patterns?
- */
-struct lock_seq_stat {
-	struct list_head        list;
-	int			state;
-	u64			prev_event_time;
-	void                    *addr;
-
-	int                     read_count;
 };
 
-struct thread_stat {
-	struct rb_node		rb;
-
-	u32                     tid;
-	struct list_head        seq_list;
-};
-
-static struct rb_root		thread_stats;
-
-static struct thread_stat *thread_stat_find(u32 tid)
-{
-	struct rb_node *node;
-	struct thread_stat *st;
-
-	node = thread_stats.rb_node;
-	while (node) {
-		st = container_of(node, struct thread_stat, rb);
-		if (st->tid == tid)
-			return st;
-		else if (tid < st->tid)
-			node = node->rb_left;
-		else
-			node = node->rb_right;
-	}
-
-	return NULL;
-}
-
-static void thread_stat_insert(struct thread_stat *new)
-{
-	struct rb_node **rb = &thread_stats.rb_node;
-	struct rb_node *parent = NULL;
-	struct thread_stat *p;
-
-	while (*rb) {
-		p = container_of(*rb, struct thread_stat, rb);
-		parent = *rb;
-
-		if (new->tid < p->tid)
-			rb = &(*rb)->rb_left;
-		else if (new->tid > p->tid)
-			rb = &(*rb)->rb_right;
-		else
-			BUG_ON("inserting invalid thread_stat\n");
-	}
-
-	rb_link_node(&new->rb, parent, rb);
-	rb_insert_color(&new->rb, &thread_stats);
-}
-
-static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
-{
-	struct thread_stat *st;
-
-	st = thread_stat_find(tid);
-	if (st)
-		return st;
-
-	st = zalloc(sizeof(struct thread_stat));
-	if (!st)
-		die("memory allocation failed\n");
-
-	st->tid = tid;
-	INIT_LIST_HEAD(&st->seq_list);
-
-	thread_stat_insert(st);
-
-	return st;
-}
-
-static struct thread_stat *thread_stat_findnew_first(u32 tid);
-static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
-	thread_stat_findnew_first;
-
-static struct thread_stat *thread_stat_findnew_first(u32 tid)
-{
-	struct thread_stat *st;
-
-	st = zalloc(sizeof(struct thread_stat));
-	if (!st)
-		die("memory allocation failed\n");
-	st->tid = tid;
-	INIT_LIST_HEAD(&st->seq_list);
-
-	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
-	rb_insert_color(&st->rb, &thread_stats);
-
-	thread_stat_findnew = thread_stat_findnew_after_first;
-	return st;
-}
-
 /* build simple key function one is bigger than two */
 #define SINGLE_KEY(member)						\
 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
@@ -305,6 +175,8 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
 		goto alloc_failed;
 	strcpy(new->name, name);
 
+	/* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */
+	new->state = LOCK_STATE_UNLOCKED;
 	new->wait_time_min = ULLONG_MAX;
 
 	list_add(&new->hash_entry, entry);
@@ -316,6 +188,8 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
 
 static char			const *input_name = "perf.data";
 
+static int			profile_cpu = -1;
+
 struct raw_event_sample {
 	u32			size;
 	char			data[0];
@@ -324,7 +198,6 @@ struct raw_event_sample {
 struct trace_acquire_event {
 	void			*addr;
 	const char		*name;
-	int			flag;
 };
 
 struct trace_acquired_event {
@@ -368,246 +241,120 @@ struct trace_lock_handler {
 			      struct thread *thread);
 };
 
-static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr)
-{
-	struct lock_seq_stat *seq;
-
-	list_for_each_entry(seq, &ts->seq_list, list) {
-		if (seq->addr == addr)
-			return seq;
-	}
-
-	seq = zalloc(sizeof(struct lock_seq_stat));
-	if (!seq)
-		die("Not enough memory\n");
-	seq->state = SEQ_STATE_UNINITIALIZED;
-	seq->addr = addr;
-
-	list_add(&seq->list, &ts->seq_list);
-	return seq;
-}
-
-static int bad_hist[4];
-
 static void
 report_lock_acquire_event(struct trace_acquire_event *acquire_event,
 			struct event *__event __used,
 			int cpu __used,
-			u64 timestamp __used,
+			u64 timestamp,
 			struct thread *thread __used)
 {
-	struct lock_stat *ls;
-	struct thread_stat *ts;
-	struct lock_seq_stat *seq;
-
-	ls = lock_stat_findnew(acquire_event->addr, acquire_event->name);
-	if (ls->discard)
-		return;
+	struct lock_stat *st;
 
-	ts = thread_stat_findnew(thread->pid);
-	seq = get_seq(ts, acquire_event->addr);
+	st = lock_stat_findnew(acquire_event->addr, acquire_event->name);
 
-	switch (seq->state) {
-	case SEQ_STATE_UNINITIALIZED:
-	case SEQ_STATE_RELEASED:
-		if (!acquire_event->flag) {
-			seq->state = SEQ_STATE_ACQUIRING;
-		} else {
-			if (acquire_event->flag & 1)
-				ls->nr_trylock++;
-			if (acquire_event->flag & 2)
-				ls->nr_readlock++;
-			seq->state = SEQ_STATE_READ_ACQUIRED;
-			seq->read_count = 1;
-			ls->nr_acquired++;
-		}
-		break;
-	case SEQ_STATE_READ_ACQUIRED:
-		if (acquire_event->flag & 2) {
-			seq->read_count++;
-			ls->nr_acquired++;
-			goto end;
-		} else {
-			goto broken;
-		}
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
 		break;
-	case SEQ_STATE_ACQUIRED:
-	case SEQ_STATE_ACQUIRING:
-	case SEQ_STATE_CONTENDED:
-broken:
-		/* broken lock sequence, discard it */
-		ls->discard = 1;
-		bad_hist[0]++;
-		list_del(&seq->list);
-		free(seq);
-		goto end;
+	case LOCK_STATE_LOCKED:
 		break;
 	default:
-		BUG_ON("Unknown state of lock sequence found!\n");
+		BUG_ON(1);
 		break;
 	}
 
-	ls->nr_acquire++;
-	seq->prev_event_time = timestamp;
-end:
-	return;
+	st->prev_event_time = timestamp;
 }
 
 static void
 report_lock_acquired_event(struct trace_acquired_event *acquired_event,
 			 struct event *__event __used,
 			 int cpu __used,
-			 u64 timestamp __used,
+			 u64 timestamp,
 			 struct thread *thread __used)
 {
-	struct lock_stat *ls;
-	struct thread_stat *ts;
-	struct lock_seq_stat *seq;
-	u64 contended_term;
-
-	ls = lock_stat_findnew(acquired_event->addr, acquired_event->name);
-	if (ls->discard)
-		return;
+	struct lock_stat *st;
 
-	ts = thread_stat_findnew(thread->pid);
-	seq = get_seq(ts, acquired_event->addr);
+	st = lock_stat_findnew(acquired_event->addr, acquired_event->name);
 
-	switch (seq->state) {
-	case SEQ_STATE_UNINITIALIZED:
-		/* orphan event, do nothing */
-		return;
-	case SEQ_STATE_ACQUIRING:
-		break;
-	case SEQ_STATE_CONTENDED:
-		contended_term = timestamp - seq->prev_event_time;
-		ls->wait_time_total += contended_term;
-
-		if (contended_term < ls->wait_time_min)
-			ls->wait_time_min = contended_term;
-		else if (ls->wait_time_max < contended_term)
-			ls->wait_time_max = contended_term;
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
+		st->state = LOCK_STATE_LOCKED;
+		st->nr_acquired++;
 		break;
-	case SEQ_STATE_RELEASED:
-	case SEQ_STATE_ACQUIRED:
-	case SEQ_STATE_READ_ACQUIRED:
-		/* broken lock sequence, discard it */
-		ls->discard = 1;
-		bad_hist[1]++;
-		list_del(&seq->list);
-		free(seq);
-		goto end;
+	case LOCK_STATE_LOCKED:
 		break;
-
 	default:
-		BUG_ON("Unknown state of lock sequence found!\n");
+		BUG_ON(1);
 		break;
 	}
 
-	seq->state = SEQ_STATE_ACQUIRED;
-	ls->nr_acquired++;
-	seq->prev_event_time = timestamp;
-end:
-	return;
+	st->prev_event_time = timestamp;
 }
 
 static void
 report_lock_contended_event(struct trace_contended_event *contended_event,
 			  struct event *__event __used,
 			  int cpu __used,
-			  u64 timestamp __used,
+			  u64 timestamp,
 			  struct thread *thread __used)
 {
-	struct lock_stat *ls;
-	struct thread_stat *ts;
-	struct lock_seq_stat *seq;
-
-	ls = lock_stat_findnew(contended_event->addr, contended_event->name);
-	if (ls->discard)
-		return;
+	struct lock_stat *st;
 
-	ts = thread_stat_findnew(thread->pid);
-	seq = get_seq(ts, contended_event->addr);
+	st = lock_stat_findnew(contended_event->addr, contended_event->name);
 
-	switch (seq->state) {
-	case SEQ_STATE_UNINITIALIZED:
-		/* orphan event, do nothing */
-		return;
-	case SEQ_STATE_ACQUIRING:
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
 		break;
-	case SEQ_STATE_RELEASED:
-	case SEQ_STATE_ACQUIRED:
-	case SEQ_STATE_READ_ACQUIRED:
-	case SEQ_STATE_CONTENDED:
-		/* broken lock sequence, discard it */
-		ls->discard = 1;
-		bad_hist[2]++;
-		list_del(&seq->list);
-		free(seq);
-		goto end;
+	case LOCK_STATE_LOCKED:
+		st->nr_contended++;
 		break;
 	default:
-		BUG_ON("Unknown state of lock sequence found!\n");
+		BUG_ON(1);
 		break;
 	}
 
-	seq->state = SEQ_STATE_CONTENDED;
-	ls->nr_contended++;
-	seq->prev_event_time = timestamp;
-end:
-	return;
+	st->prev_event_time = timestamp;
 }
 
 static void
 report_lock_release_event(struct trace_release_event *release_event,
 			struct event *__event __used,
 			int cpu __used,
-			u64 timestamp __used,
+			u64 timestamp,
 			struct thread *thread __used)
 {
-	struct lock_stat *ls;
-	struct thread_stat *ts;
-	struct lock_seq_stat *seq;
-
-	ls = lock_stat_findnew(release_event->addr, release_event->name);
-	if (ls->discard)
-		return;
+	struct lock_stat *st;
+	u64 hold_time;
 
-	ts = thread_stat_findnew(thread->pid);
-	seq = get_seq(ts, release_event->addr);
+	st = lock_stat_findnew(release_event->addr, release_event->name);
 
-	switch (seq->state) {
-	case SEQ_STATE_UNINITIALIZED:
-		goto end;
-		break;
-	case SEQ_STATE_ACQUIRED:
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
 		break;
-	case SEQ_STATE_READ_ACQUIRED:
-		seq->read_count--;
-		BUG_ON(seq->read_count < 0);
-		if (!seq->read_count) {
-			ls->nr_release++;
+	case LOCK_STATE_LOCKED:
+		st->state = LOCK_STATE_UNLOCKED;
+		hold_time = timestamp - st->prev_event_time;
+
+		if (timestamp < st->prev_event_time) {
+			/* terribly, this can happen... */
 			goto end;
 		}
-		break;
-	case SEQ_STATE_ACQUIRING:
-	case SEQ_STATE_CONTENDED:
-	case SEQ_STATE_RELEASED:
-		/* broken lock sequence, discard it */
-		ls->discard = 1;
-		bad_hist[3]++;
-		goto free_seq;
+
+		if (st->wait_time_min > hold_time)
+			st->wait_time_min = hold_time;
+		if (st->wait_time_max < hold_time)
+			st->wait_time_max = hold_time;
+		st->wait_time_total += hold_time;
+
+		st->nr_release++;
 		break;
 	default:
-		BUG_ON("Unknown state of lock sequence found!\n");
+		BUG_ON(1);
 		break;
 	}
 
-	ls->nr_release++;
-free_seq:
-	list_del(&seq->list);
-	free(seq);
 end:
-	return;
+	st->prev_event_time = timestamp;
 }
 
 /* lock oriented handlers */
@@ -634,7 +381,6 @@ process_lock_acquire_event(void *data,
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&acquire_event.addr, &tmp, sizeof(void *));
 	acquire_event.name = (char *)raw_field_ptr(event, "name", data);
-	acquire_event.flag = (int)raw_field_value(event, "flag", data);
 
 	if (trace_handler->acquire_event)
 		trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread);
@@ -695,7 +441,8 @@ process_lock_release_event(void *data,
 }
 
 static void
-process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
+process_raw_event(void *data, int cpu,
+		  u64 timestamp, struct thread *thread)
 {
 	struct event *event;
 	int type;
@@ -713,13 +460,182 @@ process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
 		process_lock_release_event(data, event, cpu, timestamp, thread);
 }
 
+struct raw_event_queue {
+	u64			timestamp;
+	int			cpu;
+	void			*data;
+	struct thread		*thread;
+	struct list_head	list;
+};
+
+static LIST_HEAD(raw_event_head);
+
+#define FLUSH_PERIOD	(5 * NSEC_PER_SEC)
+
+static u64 flush_limit = ULLONG_MAX;
+static u64 last_flush = 0;
+struct raw_event_queue *last_inserted;
+
+static void flush_raw_event_queue(u64 limit)
+{
+	struct raw_event_queue *tmp, *iter;
+
+	list_for_each_entry_safe(iter, tmp, &raw_event_head, list) {
+		if (iter->timestamp > limit)
+			return;
+
+		if (iter == last_inserted)
+			last_inserted = NULL;
+
+		process_raw_event(iter->data, iter->cpu, iter->timestamp,
+				  iter->thread);
+
+		last_flush = iter->timestamp;
+		list_del(&iter->list);
+		free(iter->data);
+		free(iter);
+	}
+}
+
+static void __queue_raw_event_end(struct raw_event_queue *new)
+{
+	struct raw_event_queue *iter;
+
+	list_for_each_entry_reverse(iter, &raw_event_head, list) {
+		if (iter->timestamp < new->timestamp) {
+			list_add(&new->list, &iter->list);
+			return;
+		}
+	}
+
+	list_add(&new->list, &raw_event_head);
+}
+
+static void __queue_raw_event_before(struct raw_event_queue *new,
+				     struct raw_event_queue *iter)
+{
+	list_for_each_entry_continue_reverse(iter, &raw_event_head, list) {
+		if (iter->timestamp < new->timestamp) {
+			list_add(&new->list, &iter->list);
+			return;
+		}
+	}
+
+	list_add(&new->list, &raw_event_head);
+}
+
+static void __queue_raw_event_after(struct raw_event_queue *new,
+				     struct raw_event_queue *iter)
+{
+	list_for_each_entry_continue(iter, &raw_event_head, list) {
+		if (iter->timestamp > new->timestamp) {
+			list_add_tail(&new->list, &iter->list);
+			return;
+		}
+	}
+	list_add_tail(&new->list, &raw_event_head);
+}
+
+/* The queue is ordered by time */
+static void __queue_raw_event(struct raw_event_queue *new)
+{
+	if (!last_inserted) {
+		__queue_raw_event_end(new);
+		return;
+	}
+
+	/*
+	 * Most of the time the current event has a timestamp
+	 * very close to the last event inserted, unless we just switched
+	 * to another event buffer. Having a sorting based on a list and
+	 * on the last inserted event that is close to the current one is
+	 * probably more efficient than an rbtree based sorting.
+	 */
+	if (last_inserted->timestamp >= new->timestamp)
+		__queue_raw_event_before(new, last_inserted);
+	else
+		__queue_raw_event_after(new, last_inserted);
+}
+
+static void queue_raw_event(void *data, int raw_size, int cpu,
+			    u64 timestamp, struct thread *thread)
+{
+	struct raw_event_queue *new;
+
+	if (flush_limit == ULLONG_MAX)
+		flush_limit = timestamp + FLUSH_PERIOD;
+
+	if (timestamp < last_flush) {
+		printf("Warning: Timestamp below last timeslice flush\n");
+		return;
+	}
+
+	new = malloc(sizeof(*new));
+	if (!new)
+		die("Not enough memory\n");
+
+	new->timestamp = timestamp;
+	new->cpu = cpu;
+	new->thread = thread;
+
+	new->data = malloc(raw_size);
+	if (!new->data)
+		die("Not enough memory\n");
+
+	memcpy(new->data, data, raw_size);
+
+	__queue_raw_event(new);
+	last_inserted = new;
+
+	/*
+	 * We want to have a slice of events covering 2 * FLUSH_PERIOD
+	 * If FLUSH_PERIOD is big enough, it ensures every events that occured
+	 * in the first half of the timeslice have all been buffered and there
+	 * are none remaining (we need that because of the weakly ordered
+	 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
+	 * timeslice, we flush the first half to be gentle with the memory
+	 * (the second half can still get new events in the middle, so wait
+	 * another period to flush it)
+	 */
+	if (new->timestamp > flush_limit &&
+		new->timestamp - flush_limit > FLUSH_PERIOD) {
+		flush_limit += FLUSH_PERIOD;
+		flush_raw_event_queue(flush_limit);
+	}
+}
+
+static int process_sample_event(event_t *event, struct perf_session *session)
+{
+	struct thread *thread;
+	struct sample_data data;
+
+	bzero(&data, sizeof(struct sample_data));
+	event__parse_sample(event, session->sample_type, &data);
+	thread = perf_session__findnew(session, data.pid);
+
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
+		return -1;
+	}
+
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
+		return 0;
+
+	queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread);
+
+	return 0;
+}
+
 /* TODO: various way to print, coloring, nano or milli sec */
 static void print_result(void)
 {
 	struct lock_stat *st;
 	char cut_name[20];
-	int bad, total;
 
+	printf("%18s ", "ID");
 	printf("%20s ", "Name");
 	printf("%10s ", "acquired");
 	printf("%10s ", "contended");
@@ -730,15 +646,11 @@ static void print_result(void)
 
 	printf("\n\n");
 
-	bad = total = 0;
 	while ((st = pop_from_result())) {
-		total++;
-		if (st->discard) {
-			bad++;
-			continue;
-		}
 		bzero(cut_name, 20);
 
+		printf("%p ", st->addr);
+
 		if (strlen(st->name) < 16) {
 			/* output raw name */
 			printf("%20s ", st->name);
@@ -761,21 +673,6 @@ static void print_result(void)
 		       0 : st->wait_time_min);
 		printf("\n");
 	}
-
-	{
-		/* Output for debug, this have to be removed */
-		int i;
-		const char *name[4] =
-			{ "acquire", "acquired", "contended", "release" };
-
-		printf("\n=== output for debug===\n\n");
-		printf("bad:%d, total:%d\n", bad, total);
-		printf("bad rate:%f\n", (double)(bad / total));
-
-		printf("histogram of events caused bad sequence\n");
-		for (i = 0; i < 4; i++)
-			printf(" %10s: %d\n", name[i], bad_hist[i]);
-	}
 }
 
 static void dump_map(void)
@@ -790,32 +687,13 @@ static void dump_map(void)
 	}
 }
 
-static int process_sample_event(event_t *self, struct perf_session *s)
-{
-	struct sample_data data;
-	struct thread *thread;
-
-	bzero(&data, sizeof(data));
-	event__parse_sample(self, s->sample_type, &data);
-
-	thread = perf_session__findnew(s, data.tid);
-	if (thread == NULL) {
-		pr_debug("problem processing %d event, skipping it.\n",
-			self->header.type);
-		return -1;
-	}
-
-	process_raw_event(data.raw_data, data.cpu, data.time, thread);
-
-	return 0;
-}
-
 static struct perf_event_ops eops = {
 	.sample			= process_sample_event,
 	.comm			= event__process_comm,
-	.ordered_samples	= true,
 };
 
+static struct perf_session *session;
+
 static int read_events(void)
 {
 	session = perf_session__new(input_name, O_RDONLY, 0);
@@ -842,6 +720,7 @@ static void __cmd_report(void)
 	setup_pager();
 	select_key();
 	read_events();
+	flush_raw_event_queue(ULLONG_MAX);
 	sort_result();
 	print_result();
 }
diff --git a/trunk/tools/perf/builtin-sched.c b/trunk/tools/perf/builtin-sched.c
index 94453f1e4e01..09ddc8e6d8e1 100644
--- a/trunk/tools/perf/builtin-sched.c
+++ b/trunk/tools/perf/builtin-sched.c
@@ -1651,10 +1651,9 @@ static int process_lost_event(event_t *event __used,
 }
 
 static struct perf_event_ops event_ops = {
-	.sample			= process_sample_event,
-	.comm			= event__process_comm,
-	.lost			= process_lost_event,
-	.ordered_samples	= true,
+	.sample	= process_sample_event,
+	.comm	= event__process_comm,
+	.lost	= process_lost_event,
 };
 
 static int read_events(void)
@@ -1851,6 +1850,7 @@ static const char *record_args[] = {
 	"record",
 	"-a",
 	"-R",
+	"-M",
 	"-f",
 	"-m", "1024",
 	"-c", "1",
diff --git a/trunk/tools/perf/builtin-timechart.c b/trunk/tools/perf/builtin-timechart.c
index c35aa44f82ba..96f4a092df37 100644
--- a/trunk/tools/perf/builtin-timechart.c
+++ b/trunk/tools/perf/builtin-timechart.c
@@ -143,6 +143,9 @@ struct wake_event {
 static struct power_event    *power_events;
 static struct wake_event     *wake_events;
 
+struct sample_wrapper *all_samples;
+
+
 struct process_filter;
 struct process_filter {
 	char			*name;
@@ -563,6 +566,88 @@ static void end_sample_processing(void)
 	}
 }
 
+static u64 sample_time(event_t *event, const struct perf_session *session)
+{
+	int cursor;
+
+	cursor = 0;
+	if (session->sample_type & PERF_SAMPLE_IP)
+		cursor++;
+	if (session->sample_type & PERF_SAMPLE_TID)
+		cursor++;
+	if (session->sample_type & PERF_SAMPLE_TIME)
+		return event->sample.array[cursor];
+	return 0;
+}
+
+
+/*
+ * We first queue all events, sorted backwards by insertion.
+ * The order will get flipped later.
+ */
+static int queue_sample_event(event_t *event, struct perf_session *session)
+{
+	struct sample_wrapper *copy, *prev;
+	int size;
+
+	size = event->sample.header.size + sizeof(struct sample_wrapper) + 8;
+
+	copy = malloc(size);
+	if (!copy)
+		return 1;
+
+	memset(copy, 0, size);
+
+	copy->next = NULL;
+	copy->timestamp = sample_time(event, session);
+
+	memcpy(&copy->data, event, event->sample.header.size);
+
+	/* insert in the right place in the list */
+
+	if (!all_samples) {
+		/* first sample ever */
+		all_samples = copy;
+		return 0;
+	}
+
+	if (all_samples->timestamp < copy->timestamp) {
+		/* insert at the head of the list */
+		copy->next = all_samples;
+		all_samples = copy;
+		return 0;
+	}
+
+	prev = all_samples;
+	while (prev->next) {
+		if (prev->next->timestamp < copy->timestamp) {
+			copy->next = prev->next;
+			prev->next = copy;
+			return 0;
+		}
+		prev = prev->next;
+	}
+	/* insert at the end of the list */
+	prev->next = copy;
+
+	return 0;
+}
+
+static void sort_queued_samples(void)
+{
+	struct sample_wrapper *cursor, *next;
+
+	cursor = all_samples;
+	all_samples = NULL;
+
+	while (cursor) {
+		next = cursor->next;
+		cursor->next = all_samples;
+		all_samples = cursor;
+		cursor = next;
+	}
+}
+
 /*
  * Sort the pid datastructure
  */
@@ -926,12 +1011,26 @@ static void write_svg_file(const char *filename)
 	svg_close();
 }
 
+static void process_samples(struct perf_session *session)
+{
+	struct sample_wrapper *cursor;
+	event_t *event;
+
+	sort_queued_samples();
+
+	cursor = all_samples;
+	while (cursor) {
+		event = (void *)&cursor->data;
+		cursor = cursor->next;
+		process_sample_event(event, session);
+	}
+}
+
 static struct perf_event_ops event_ops = {
-	.comm			= process_comm_event,
-	.fork			= process_fork_event,
-	.exit			= process_exit_event,
-	.sample			= process_sample_event,
-	.ordered_samples	= true,
+	.comm	= process_comm_event,
+	.fork	= process_fork_event,
+	.exit	= process_exit_event,
+	.sample	= queue_sample_event,
 };
 
 static int __cmd_timechart(void)
@@ -949,6 +1048,8 @@ static int __cmd_timechart(void)
 	if (ret)
 		goto out_delete;
 
+	process_samples(session);
+
 	end_sample_processing();
 
 	sort_pids();
@@ -971,6 +1072,7 @@ static const char *record_args[] = {
 	"record",
 	"-a",
 	"-R",
+	"-M",
 	"-f",
 	"-c", "1",
 	"-e", "power:power_start",
diff --git a/trunk/tools/perf/builtin-trace.c b/trunk/tools/perf/builtin-trace.c
index 77f556f7604c..2eefb33c9679 100644
--- a/trunk/tools/perf/builtin-trace.c
+++ b/trunk/tools/perf/builtin-trace.c
@@ -11,8 +11,6 @@
 
 static char const		*script_name;
 static char const		*generate_script_lang;
-static bool			debug_ordering;
-static u64			last_timestamp;
 
 static int default_start_script(const char *script __unused,
 				int argc __unused,
@@ -89,14 +87,6 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	}
 
 	if (session->sample_type & PERF_SAMPLE_RAW) {
-		if (debug_ordering) {
-			if (data.time < last_timestamp) {
-				pr_err("Samples misordered, previous: %llu "
-					"this: %llu\n", last_timestamp,
-					data.time);
-			}
-			last_timestamp = data.time;
-		}
 		/*
 		 * FIXME: better resolve from pid from the struct trace_entry
 		 * field, although it should be the same than this perf
@@ -118,7 +108,6 @@ static struct perf_event_ops event_ops = {
 	.event_type = event__process_event_type,
 	.tracing_data = event__process_tracing_data,
 	.build_id = event__process_build_id,
-	.ordered_samples = true,
 };
 
 extern volatile int session_done;
@@ -542,8 +531,6 @@ static const struct option options[] = {
 		   "generate perf-trace.xx script in specified language"),
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
-	OPT_BOOLEAN('d', "debug-ordering", &debug_ordering,
-		   "check that samples time ordering is monotonic"),
 
 	OPT_END()
 };
diff --git a/trunk/tools/perf/util/parse-events.c b/trunk/tools/perf/util/parse-events.c
index 600d32714257..3b4ec6797565 100644
--- a/trunk/tools/perf/util/parse-events.c
+++ b/trunk/tools/perf/util/parse-events.c
@@ -418,6 +418,12 @@ parse_single_tracepoint_event(char *sys_name,
 	u64 id;
 	int fd;
 
+	attr->sample_type |= PERF_SAMPLE_RAW;
+	attr->sample_type |= PERF_SAMPLE_TIME;
+	attr->sample_type |= PERF_SAMPLE_CPU;
+
+	attr->sample_period = 1;
+
 	snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
 		 sys_name, evt_name);
 
@@ -436,13 +442,6 @@ parse_single_tracepoint_event(char *sys_name,
 	attr->type = PERF_TYPE_TRACEPOINT;
 	*strp = evt_name + evt_length;
 
-	attr->sample_type |= PERF_SAMPLE_RAW;
-	attr->sample_type |= PERF_SAMPLE_TIME;
-	attr->sample_type |= PERF_SAMPLE_CPU;
-
-	attr->sample_period = 1;
-
-
 	return EVT_HANDLED;
 }
 
diff --git a/trunk/tools/perf/util/session.c b/trunk/tools/perf/util/session.c
index b7aade2184b2..7d88ae5c270f 100644
--- a/trunk/tools/perf/util/session.c
+++ b/trunk/tools/perf/util/session.c
@@ -98,8 +98,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
 	self->cwdlen = 0;
 	self->unknown_events = 0;
 	self->kerninfo_root = RB_ROOT;
-	self->ordered_samples.flush_limit = ULLONG_MAX;
-	INIT_LIST_HEAD(&self->ordered_samples.samples_head);
 
 	if (mode == O_RDONLY) {
 		if (perf_session__open(self, force) < 0)
@@ -353,178 +351,6 @@ static event__swap_op event__swap_ops[] = {
 	[PERF_RECORD_HEADER_MAX]    = NULL,
 };
 
-struct sample_queue {
-	u64			timestamp;
-	struct sample_event	*event;
-	struct list_head	list;
-};
-
-#define FLUSH_PERIOD	(2 * NSEC_PER_SEC)
-
-static void flush_sample_queue(struct perf_session *s,
-			       struct perf_event_ops *ops)
-{
-	struct list_head *head = &s->ordered_samples.samples_head;
-	u64 limit = s->ordered_samples.flush_limit;
-	struct sample_queue *tmp, *iter;
-
-	if (!ops->ordered_samples)
-		return;
-
-	list_for_each_entry_safe(iter, tmp, head, list) {
-		if (iter->timestamp > limit)
-			return;
-
-		if (iter == s->ordered_samples.last_inserted)
-			s->ordered_samples.last_inserted = NULL;
-
-		ops->sample((event_t *)iter->event, s);
-
-		s->ordered_samples.last_flush = iter->timestamp;
-		list_del(&iter->list);
-		free(iter->event);
-		free(iter);
-	}
-}
-
-static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
-{
-	struct sample_queue *iter;
-
-	list_for_each_entry_reverse(iter, head, list) {
-		if (iter->timestamp < new->timestamp) {
-			list_add(&new->list, &iter->list);
-			return;
-		}
-	}
-
-	list_add(&new->list, head);
-}
-
-static void __queue_sample_before(struct sample_queue *new,
-				  struct sample_queue *iter,
-				  struct list_head *head)
-{
-	list_for_each_entry_continue_reverse(iter, head, list) {
-		if (iter->timestamp < new->timestamp) {
-			list_add(&new->list, &iter->list);
-			return;
-		}
-	}
-
-	list_add(&new->list, head);
-}
-
-static void __queue_sample_after(struct sample_queue *new,
-				 struct sample_queue *iter,
-				 struct list_head *head)
-{
-	list_for_each_entry_continue(iter, head, list) {
-		if (iter->timestamp > new->timestamp) {
-			list_add_tail(&new->list, &iter->list);
-			return;
-		}
-	}
-	list_add_tail(&new->list, head);
-}
-
-/* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
-				 struct perf_session *s)
-{
-	struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
-	struct list_head *head = &s->ordered_samples.samples_head;
-
-
-	if (!last_inserted) {
-		__queue_sample_end(new, head);
-		return;
-	}
-
-	/*
-	 * Most of the time the current event has a timestamp
-	 * very close to the last event inserted, unless we just switched
-	 * to another event buffer. Having a sorting based on a list and
-	 * on the last inserted event that is close to the current one is
-	 * probably more efficient than an rbtree based sorting.
-	 */
-	if (last_inserted->timestamp >= new->timestamp)
-		__queue_sample_before(new, last_inserted, head);
-	else
-		__queue_sample_after(new, last_inserted, head);
-}
-
-static int queue_sample_event(event_t *event, struct sample_data *data,
-			      struct perf_session *s,
-			      struct perf_event_ops *ops)
-{
-	u64 timestamp = data->time;
-	struct sample_queue *new;
-	u64 flush_limit;
-
-
-	if (s->ordered_samples.flush_limit == ULLONG_MAX)
-		s->ordered_samples.flush_limit = timestamp + FLUSH_PERIOD;
-
-	if (timestamp < s->ordered_samples.last_flush) {
-		printf("Warning: Timestamp below last timeslice flush\n");
-		return -EINVAL;
-	}
-
-	new = malloc(sizeof(*new));
-	if (!new)
-		return -ENOMEM;
-
-	new->timestamp = timestamp;
-
-	new->event = malloc(event->header.size);
-	if (!new->event) {
-		free(new);
-		return -ENOMEM;
-	}
-
-	memcpy(new->event, event, event->header.size);
-
-	__queue_sample_event(new, s);
-	s->ordered_samples.last_inserted = new;
-
-	/*
-	 * We want to have a slice of events covering 2 * FLUSH_PERIOD
-	 * If FLUSH_PERIOD is big enough, it ensures every events that occured
-	 * in the first half of the timeslice have all been buffered and there
-	 * are none remaining (we need that because of the weakly ordered
-	 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
-	 * timeslice, we flush the first half to be gentle with the memory
-	 * (the second half can still get new events in the middle, so wait
-	 * another period to flush it)
-	 */
-	flush_limit = s->ordered_samples.flush_limit;
-
-	if (new->timestamp > flush_limit &&
-		new->timestamp - flush_limit > FLUSH_PERIOD) {
-		s->ordered_samples.flush_limit += FLUSH_PERIOD;
-		flush_sample_queue(s, ops);
-	}
-
-	return 0;
-}
-
-static int perf_session__process_sample(event_t *event, struct perf_session *s,
-					struct perf_event_ops *ops)
-{
-	struct sample_data data;
-
-	if (!ops->ordered_samples)
-		return ops->sample(event, s);
-
-	bzero(&data, sizeof(struct sample_data));
-	event__parse_sample(event, s->sample_type, &data);
-
-	queue_sample_event(event, &data, s, ops);
-
-	return 0;
-}
-
 static int perf_session__process_event(struct perf_session *self,
 				       event_t *event,
 				       struct perf_event_ops *ops,
@@ -545,7 +371,7 @@ static int perf_session__process_event(struct perf_session *self,
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
-		return perf_session__process_sample(event, self, ops);
+		return ops->sample(event, self);
 	case PERF_RECORD_MMAP:
 		return ops->mmap(event, self);
 	case PERF_RECORD_COMM:
@@ -785,9 +611,6 @@ int __perf_session__process_events(struct perf_session *self,
 		goto more;
 done:
 	err = 0;
-	/* do the final flush for ordered samples */
-	self->ordered_samples.flush_limit = ULLONG_MAX;
-	flush_sample_queue(self, ops);
 out_err:
 	ui_progress__delete(progress);
 	return err;
diff --git a/trunk/tools/perf/util/session.h b/trunk/tools/perf/util/session.h
index 796e2291ebd7..5e47c87b9266 100644
--- a/trunk/tools/perf/util/session.h
+++ b/trunk/tools/perf/util/session.h
@@ -8,17 +8,9 @@
 #include <linux/rbtree.h>
 #include "../../../include/linux/perf_event.h"
 
-struct sample_queue;
 struct ip_callchain;
 struct thread;
 
-struct ordered_samples {
-	u64			last_flush;
-	u64			flush_limit;
-	struct list_head	samples_head;
-	struct sample_queue	*last_inserted;
-};
-
 struct perf_session {
 	struct perf_header	header;
 	unsigned long		size;
@@ -36,7 +28,6 @@ struct perf_session {
 	bool			fd_pipe;
 	int			cwdlen;
 	char			*cwd;
-	struct ordered_samples	ordered_samples;
 	char filename[0];
 };
 
@@ -56,7 +47,6 @@ struct perf_event_ops {
 		 event_type,
 		 tracing_data,
 		 build_id;
-	bool	ordered_samples;
 };
 
 struct perf_session *perf_session__new(const char *filename, int mode, bool force);
diff --git a/trunk/tools/perf/util/symbol.c b/trunk/tools/perf/util/symbol.c
index e782e7db16c5..e77c33a11de3 100644
--- a/trunk/tools/perf/util/symbol.c
+++ b/trunk/tools/perf/util/symbol.c
@@ -189,6 +189,7 @@ struct dso *dso__new(const char *name)
 		self->sorted_by_name = 0;
 		self->has_build_id = 0;
 		self->kernel = DSO_TYPE_USER;
+		INIT_LIST_HEAD(&self->node);
 	}
 
 	return self;