From 93b352fce679945845664b56b0c3afbd655a7a12 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 16 Aug 2010 16:09:09 +0800 Subject: [PATCH 1/8] pxa3xx: fix ns2cycle equation Test on a PXA310 platform with Samsung K9F2G08X0B NAND flash, with tCH=5 and clk is 156MHz, ns2cycle(5, 156000000) returns -1. ns2cycle returns negtive value will break NDTR0_tXX macros. After checking the commit log, I found the problem is introduced by commit 5b0d4d7c8a67c5ba3d35e6ceb0c5530cc6846db7 "[MTD] [NAND] pxa3xx: convert from ns to clock ticks more accurately" To get num of clock cycles, we use below equation: num of clock cycles = time (ns) / one clock cycle (ns) + 1 We need to add 1 cycle here because integer division will truncate the result. It is possible the developers set the Min values in SPEC for timing settings. Thus the truncate may cause problem, and it is safe to add an extra cycle here. The various fields in NDTR{01} are in units of clock ticks minus one, thus we should subtract 1 cycle then. Thus the correct equation should be: num of clock cycles = time (ns) / one clock cycle (ns) + 1 - 1 = time (ns) / one clock cycle (ns) Signed-off-by: Axel Lin Signed-off-by: Lei Wen Acked-by: Eric Miao Signed-off-by: David Woodhouse Cc: stable@kernel.org --- drivers/mtd/nand/pxa3xx_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index e02fa4f0e3c9..4d89f3780207 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -363,7 +363,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = { #define tAR_NDTR1(r) (((r) >> 0) & 0xf) /* convert nano-seconds to nand flash controller clock cycles */ -#define ns2cycle(ns, clk) (int)(((ns) * (clk / 1000000) / 1000) - 1) +#define ns2cycle(ns, clk) (int)((ns) * (clk / 1000000) / 1000) /* convert nand flash controller clock cycles to nano-seconds */ #define cycle2ns(c, clk) ((((c) + 1) * 1000000 + clk / 500) / (clk / 1000)) From 065a1ed8de85583888b3d4f22c64b534a1fbdaaa Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 18 Aug 2010 11:25:04 -0700 Subject: [PATCH 2/8] mtd: nand: Fix regression in BBM detection Commit c7b28e25cb9beb943aead770ff14551b55fa8c79 ("mtd: nand: refactor BB marker detection") caused a regression in detection of factory-set bad block markers, especially for certain small-page NAND. This fix removes some unneeded constraints on using NAND_SMALL_BADBLOCK_POS, making the detection code more correct. This regression can be seen, for example, in Hynix HY27US081G1M and similar. Signed-off-by: Brian Norris Tested-by: Michael Guntsche Signed-off-by: David Woodhouse --- drivers/mtd/nand/nand_base.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index a3c7473dd409..a22ed7b281ce 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2934,14 +2934,10 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, chip->chip_shift = ffs((unsigned)(chip->chipsize >> 32)) + 32 - 1; /* Set the bad block position */ - if (!(busw & NAND_BUSWIDTH_16) && (*maf_id == NAND_MFR_STMICRO || - (*maf_id == NAND_MFR_SAMSUNG && - mtd->writesize == 512) || - *maf_id == NAND_MFR_AMD)) - chip->badblockpos = NAND_SMALL_BADBLOCK_POS; - else + if (mtd->writesize > 512 || (busw & NAND_BUSWIDTH_16)) chip->badblockpos = NAND_LARGE_BADBLOCK_POS; - + else + chip->badblockpos = NAND_SMALL_BADBLOCK_POS; /* Get chip options, preserve non chip based options */ chip->options &= ~NAND_CHIPOPTIONS_MSK; From cfe3fdadb16162327773ef01a575a32000b8c7f4 Mon Sep 17 00:00:00 2001 From: Tilman Sauerbeck Date: Fri, 20 Aug 2010 14:01:47 -0700 Subject: [PATCH 3/8] mtd: nand: Fix probe of Samsung NAND chips Apparently, the check for a 6-byte ID string introduced by commit 426c457a3216fac74e3d44dd39729b0689f4c7ab ("mtd: nand: extend NAND flash detection to new MLC chips") is NOT sufficient to determine whether or not a Samsung chip uses their new MLC detection scheme or the old, standard scheme. This adds a condition to check cell type. Signed-off-by: Tilman Sauerbeck Signed-off-by: Brian Norris Signed-off-by: David Woodhouse Cc: stable@kernel.org --- drivers/mtd/nand/nand_base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index a22ed7b281ce..d551ddd9537a 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2866,6 +2866,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, */ if (id_data[0] == id_data[6] && id_data[1] == id_data[7] && id_data[0] == NAND_MFR_SAMSUNG && + (chip->cellinfo & NAND_CI_CELLTYPE_MSK) && id_data[5] != 0x00) { /* Calc pagesize */ mtd->writesize = 2048 << (extid & 0x03); From 297c5eee372478fc32fec5fe8eed711eedb13f3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Aug 2010 16:24:55 -0700 Subject: [PATCH 4/8] mm: make the vma list be doubly linked It's a really simple list, and several of the users want to go backwards in it to find the previous vma. So rather than have to look up the previous entry with 'find_vma_prev()' or something similar, just make it doubly linked instead. Tested-by: Ian Campbell Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- kernel/fork.c | 7 +++++-- mm/mmap.c | 21 +++++++++++++++++---- mm/nommu.c | 7 +++++-- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6a1f37..ee7e258627f9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -134,7 +134,7 @@ struct vm_area_struct { within vm_mm. */ /* linked list of VM areas per task, sorted by address */ - struct vm_area_struct *vm_next; + struct vm_area_struct *vm_next, *vm_prev; pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, see mm.h. */ diff --git a/kernel/fork.c b/kernel/fork.c index 856eac3ec52e..b7e9d60a675d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -300,7 +300,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) #ifdef CONFIG_MMU static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { - struct vm_area_struct *mpnt, *tmp, **pprev; + struct vm_area_struct *mpnt, *tmp, *prev, **pprev; struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; @@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; + prev = NULL; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; @@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; - tmp->vm_next = NULL; + tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; @@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) */ *pprev = tmp; pprev = &tmp->vm_next; + tmp->vm_prev = prev; + prev = tmp; __vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; diff --git a/mm/mmap.c b/mm/mmap.c index 31003338b978..331e51af38c9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -388,17 +388,23 @@ static inline void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent) { + struct vm_area_struct *next; + + vma->vm_prev = prev; if (prev) { - vma->vm_next = prev->vm_next; + next = prev->vm_next; prev->vm_next = vma; } else { mm->mmap = vma; if (rb_parent) - vma->vm_next = rb_entry(rb_parent, + next = rb_entry(rb_parent, struct vm_area_struct, vm_rb); else - vma->vm_next = NULL; + next = NULL; } + vma->vm_next = next; + if (next) + next->vm_prev = vma; } void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, @@ -483,7 +489,11 @@ static inline void __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev) { - prev->vm_next = vma->vm_next; + struct vm_area_struct *next = vma->vm_next; + + prev->vm_next = next; + if (next) + next->vm_prev = prev; rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma) mm->mmap_cache = prev; @@ -1915,6 +1925,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr; insertion_point = (prev ? &prev->vm_next : &mm->mmap); + vma->vm_prev = NULL; do { rb_erase(&vma->vm_rb, &mm->mm_rb); mm->map_count--; @@ -1922,6 +1933,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, vma = vma->vm_next; } while (vma && vma->vm_start < end); *insertion_point = vma; + if (vma) + vma->vm_prev = prev; tail_vma->vm_next = NULL; if (mm->unmap_area == arch_unmap_area) addr = prev ? prev->vm_end : mm->mmap_base; diff --git a/mm/nommu.c b/mm/nommu.c index efa9a380335e..88ff091eb07a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags) */ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) { - struct vm_area_struct *pvma, **pp; + struct vm_area_struct *pvma, **pp, *next; struct address_space *mapping; struct rb_node **p, *parent; @@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) break; } - vma->vm_next = *pp; + next = *pp; *pp = vma; + vma->vm_next = next; + if (next) + next->vm_prev = vma; } /* From 7798330ac8114c731cfab83e634c6ecedaa233d7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Aug 2010 16:39:25 -0700 Subject: [PATCH 5/8] mm: make the mlock() stack guard page checks stricter If we've split the stack vma, only the lowest one has the guard page. Now that we have a doubly linked list of vma's, checking this is trivial. Tested-by: Ian Campbell Signed-off-by: Linus Torvalds --- mm/mlock.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index 49e5e4cb8232..cbae7c5b9568 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -135,6 +135,19 @@ void munlock_vma_page(struct page *page) } } +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + +static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSDOWN) && + (vma->vm_start == addr) && + !vma_stack_continue(vma->vm_prev, addr); +} + /** * __mlock_vma_pages_range() - mlock a range of pages in the vma. * @vma: target vma @@ -168,11 +181,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, gup_flags |= FOLL_WRITE; /* We don't try to access the guard page of a stack vma */ - if (vma->vm_flags & VM_GROWSDOWN) { - if (start == vma->vm_start) { - start += PAGE_SIZE; - nr_pages--; - } + if (stack_guard_page(vma, start)) { + addr += PAGE_SIZE; + nr_pages--; } while (nr_pages > 0) { From 0e8e50e20c837eeec8323bba7dcd25fe5479194c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Aug 2010 16:49:40 -0700 Subject: [PATCH 6/8] mm: make stack guard page logic use vm_prev pointer Like the mlock() change previously, this makes the stack guard check code use vma->vm_prev to see what the mapping below the current stack is, rather than have to look it up with find_vma(). Also, accept an abutting stack segment, since that happens naturally if you split the stack with mlock or mprotect. Tested-by: Ian Campbell Signed-off-by: Linus Torvalds --- mm/memory.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b6e5fd23cc5a..2ed2267439df 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2770,11 +2770,18 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo { address &= PAGE_MASK; if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { - address -= PAGE_SIZE; - if (find_vma(vma->vm_mm, address) != vma) - return -ENOMEM; + struct vm_area_struct *prev = vma->vm_prev; + + /* + * Is there a mapping abutting this one below? + * + * That's only ok if it's the same stack mapping + * that has gotten split.. + */ + if (prev && prev->vm_end == address) + return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - expand_stack(vma, address); + expand_stack(vma, address - PAGE_SIZE); } return 0; } From ddb0c5a689c857bb13a42d9a3f0a7604497b3a29 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sat, 21 Aug 2010 21:32:41 +0200 Subject: [PATCH 7/8] Replace Configure with Enable in description of MAXSMP The "Configure" word tends to make user believe they have to say 'yes' to be able to choose the number of procs/nodes. "Enable" should be unambiguous enough. Signed-off-by: Samuel Thibault Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac7827fc0823..cea0cd9a316f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -754,11 +754,11 @@ config IOMMU_API def_bool (AMD_IOMMU || DMAR) config MAXSMP - bool "Configure Maximum number of SMP Processors and NUMA Nodes" + bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL select CPUMASK_OFFSTACK ---help--- - Configure maximum number of CPUS and NUMA Nodes for this architecture. + Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. config NR_CPUS From e36c886a0f9d624377977fa6cae309cfd7f362fa Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 21 Aug 2010 13:07:26 -0700 Subject: [PATCH 8/8] workqueue: Add basic tracepoints to track workqueue execution With the introduction of the new unified work queue thread pools, we lost one feature: It's no longer possible to know which worker is causing the CPU to wake out of idle. The result is that PowerTOP now reports a lot of "kworker/a:b" instead of more readable results. This patch adds a pair of tracepoints to the new workqueue code, similar in style to the timer/hrtimer tracepoints. With this pair of tracepoints, the next PowerTOP can correctly report which work item caused the wakeup (and how long it took): Interrupt (43) i915 time 3.51ms wakeups 141 Work ieee80211_iface_work time 0.81ms wakeups 29 Work do_dbs_timer time 0.55ms wakeups 24 Process Xorg time 21.36ms wakeups 4 Timer sched_rt_period_timer time 0.01ms wakeups 1 Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/trace/events/workqueue.h | 62 ++++++++++++++++++++++++++++++++ kernel/workqueue.c | 9 +++++ 2 files changed, 71 insertions(+) create mode 100644 include/trace/events/workqueue.h diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h new file mode 100644 index 000000000000..49682d7e9d60 --- /dev/null +++ b/include/trace/events/workqueue.h @@ -0,0 +1,62 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM workqueue + +#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WORKQUEUE_H + +#include +#include + +/** + * workqueue_execute_start - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(workqueue_execute_start, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + ), + + TP_printk("work struct %p: function %pf", __entry->work, __entry->function) +); + +/** + * workqueue_execute_end - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(workqueue_execute_end, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + ), + + TP_fast_assign( + __entry->work = work; + ), + + TP_printk("work struct %p", __entry->work) +); + + +#endif /* _TRACE_WORKQUEUE_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2994a0e3a61c..8bd600c020e5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -35,6 +35,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include "workqueue_sched.h" enum { @@ -1790,7 +1793,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work) work_clear_pending(work); lock_map_acquire(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); + trace_workqueue_execute_start(work); f(work); + /* + * While we must be careful to not use "work" after this, the trace + * point will only record its address. + */ + trace_workqueue_execute_end(work); lock_map_release(&lockdep_map); lock_map_release(&cwq->wq->lockdep_map);