From ca04d9c3ec721e474f00992efc1b1afb625507f5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 26 Aug 2010 16:12:01 -0700 Subject: [PATCH 01/26] ceph: fix null pointer deref on anon root dentry release When we release a root dentry, particularly after a splice, the parent (actually our) inode was evaluating to NULL and was getting dereferenced by ceph_snap(). This is reproduced by something as simple as mount -t ceph monhost:/a/b mnt mount -t ceph monhost:/a mnt2 ls mnt2 A splice_dentry() would kill the old 'b' inode's root dentry, and we'd crash while releasing it. Fix by checking for both the ROOT and NULL cases explicitly. We only need to invalidate the parent dir when we have a correct parent to invalidate. Signed-off-by: Sage Weil --- fs/ceph/dir.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6e4f43ff23ec5..a1986eb52045b 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1021,11 +1021,15 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) static void ceph_dentry_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); - struct inode *parent_inode = dentry->d_parent->d_inode; - u64 snapid = ceph_snap(parent_inode); + struct inode *parent_inode = NULL; + u64 snapid = CEPH_NOSNAP; + if (!IS_ROOT(dentry)) { + parent_inode = dentry->d_parent->d_inode; + if (parent_inode) + snapid = ceph_snap(parent_inode); + } dout("dentry_release %p parent %p\n", dentry, parent_inode); - if (parent_inode && snapid != CEPH_SNAPDIR) { struct ceph_inode_info *ci = ceph_inode(parent_inode); From 3d4401d9d0aef5c40706350685ddea3df6708496 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Fri, 3 Sep 2010 12:57:11 -0700 Subject: [PATCH 02/26] ceph: fix pagelist kunmap tail A wrong parameter was passed to the kunmap. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/pagelist.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index b6859f47d364b..46a368b6dce53 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c @@ -5,10 +5,18 @@ #include "pagelist.h" +static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) +{ + struct page *page = list_entry(pl->head.prev, struct page, + lru); + kunmap(page); +} + int ceph_pagelist_release(struct ceph_pagelist *pl) { if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); + while (!list_empty(&pl->head)) { struct page *page = list_first_entry(&pl->head, struct page, lru); @@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) pl->room += PAGE_SIZE; list_add_tail(&page->lru, &pl->head); if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); pl->mapped_tail = kmap(page); return 0; } From 3612abbd5df6baa9ca3e0777c6c8646e202d3f66 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 7 Sep 2010 15:59:27 -0700 Subject: [PATCH 03/26] ceph: fix reconnect encoding for old servers Fix the reconnect encoding to encode the cap record when the MDS does not have the FLOCK capability (i.e., pre v0.22). Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f091b13517863..fad95f8f2608b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, num_fcntl_locks, num_flock_locks); unlock_kernel(); + } else { + err = ceph_pagelist_append(pagelist, &rec, reclen); } out_free: From a77d9f7dce7600058d56f0670ed29d77abffcde2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 11 Sep 2010 10:55:25 -0700 Subject: [PATCH 04/26] ceph: fix file offset wrapping at 4GB on 32-bit archs Cast the value before shifting so that we don't run out of bits with a 32-bit unsigned long. This fixes wrapping of high file offsets into the low 4GB of a file on disk, and the subsequent data corruption for large files. Signed-off-by: Sage Weil --- fs/ceph/addr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4cfce1ee31faa..50461b8c23a4e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -766,7 +766,8 @@ static int ceph_writepages_start(struct address_space *mapping, /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = page->index << PAGE_CACHE_SHIFT; + offset = (unsigned long long)page->index + << PAGE_CACHE_SHIFT; len = wsize; req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, From 897493504addc5609f04a2c4f73c37ab972c29b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Sep 2010 18:25:19 +0100 Subject: [PATCH 05/26] drm/i915: Ensure that the crtcinfo is populated during mode_fixup() This should fix the mysterious mode setting failures reported during boot up and after resume, generally for i8xx class machines. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16478 Reported-and-tested-by: Xavier Chantry Buzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29413 Tested-by: Daniel Vetter Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/intel_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 19daead5b525d..b5bf51a4502dc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2463,11 +2463,19 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; + if (HAS_PCH_SPLIT(dev)) { /* FDI link clock is fixed at 2.7G */ if (mode->clock * 3 > IRONLAKE_FDI_FREQ * 4) return false; } + + /* XXX some encoders set the crtcinfo, others don't. + * Obviously we need some form of conflict resolution here... + */ + if (adjusted_mode->crtc_htotal == 0) + drm_mode_set_crtcinfo(adjusted_mode, 0); + return true; } From 467c525109d5d542d7d416b0c11bdd54610fe2f4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Sep 2010 11:39:20 -0700 Subject: [PATCH 06/26] ceph: fix dn offset during readdir_prepopulate When adding the readdir results to the cache, ceph_set_dentry_offset was clobbered our just-set offset. This can cause the readdir result offsets to get out of sync with the server. Add an argument to the helper so that it does not. This bug was introduced by 1cd3935bedccf592d44343890251452a6dd74fc4. Signed-off-by: Sage Weil --- fs/ceph/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e7cca414da03b..62377ec37edf0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) * the caller) if we fail. */ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, - bool *prehash) + bool *prehash, bool set_offset) { struct dentry *realdn; @@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } if ((!prehash || *prehash) && d_unhashed(dn)) d_rehash(dn); - ceph_set_dentry_offset(dn); + if (set_offset) + ceph_set_dentry_offset(dn); out: return dn; } @@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, d_delete(dn); goto done; } - dn = splice_dentry(dn, in, &have_lease); + dn = splice_dentry(dn, in, &have_lease, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, goto done; } dout(" linking snapped dir %p to dn %p\n", in, dn); - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1237,7 +1238,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, err = PTR_ERR(in); goto out; } - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, false); if (IS_ERR(dn)) dn = NULL; } From 05ed160e89baf7f5fb3432d527fec467d2493626 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 13 Sep 2010 19:25:41 +0900 Subject: [PATCH 07/26] kprobes: Fix Kconfig dependency Fix Kconfig dependency among Kprobes, optprobe and kallsyms. Kprobes uses kallsyms_lookup for finding target function and checking instruction boundary, thus CONFIG_KPROBES should select CONFIG_KALLSYMS. Optprobe is an optional feature which is supported on x86 arch, and it also uses kallsyms_lookup for checking instructions in the target function. Since KALLSYMS_ALL just adds symbols of kernel variables, it doesn't need to select KALLSYMS_ALL. Signed-off-by: Masami Hiramatsu Acked-by: Randy Dunlap , Cc: Ananth N Mavinakayanahalli Cc: Felipe Contreras Cc: Randy Dunlap Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Cc: akpm LKML-Reference: <20100913102541.20260.85700.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar --- arch/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 4877a8c8ee169..fe48fc7a3ebaf 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -32,8 +32,9 @@ config HAVE_OPROFILE config KPROBES bool "Kprobes" - depends on KALLSYMS && MODULES + depends on MODULES depends on HAVE_KPROBES + select KALLSYMS help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes @@ -45,7 +46,6 @@ config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES depends on !PREEMPT - select KALLSYMS_ALL config HAVE_EFFICIENT_UNALIGNED_ACCESS bool From 8bef9239ee1a42eb37d3f83bacf6a75f019c028d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 14 Sep 2010 15:45:44 -0700 Subject: [PATCH 08/26] ceph: correctly set 'follows' in flushsnap messages The 'follows' should match the seq for the snap context for the given snap cap, which is the context under which we have been dirtying and writing data and metadata. The snapshot that _contains_ those updates thus _follows_ that context's seq #. Signed-off-by: Sage Weil --- fs/ceph/snap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4868b9dcac5a6..9e836afba3411 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -467,7 +467,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) INIT_LIST_HEAD(&capsnap->ci_item); INIT_LIST_HEAD(&capsnap->flushing_item); - capsnap->follows = snapc->seq - 1; + capsnap->follows = snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = dirty; From cfc0bf6640dfd0f43bf8bfec5a475284809baa4d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 14 Sep 2010 15:50:59 -0700 Subject: [PATCH 09/26] ceph: stop sending FLUSHSNAPs when we hit a dirty capsnap Stop sending FLUSHSNAP messages when we hit a capsnap that has dirty_pages or is still writing. We'll send the newer capsnaps only after the older ones complete. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a2069b6680aed..9fbe9019155c1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1227,7 +1227,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, * pages to be written out. */ if (capsnap->dirty_pages || capsnap->writing) - continue; + break; /* * if cap writeback already occurred, we should have dropped @@ -1276,8 +1276,8 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, &session->s_cap_snaps_flushing); spin_unlock(&inode->i_lock); - dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", - inode, capsnap, next_follows, capsnap->size); + dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", + inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, From e75e863dd5c7d96b91ebbd241da5328fc38a78cc Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 14 Sep 2010 16:35:14 +0200 Subject: [PATCH 10/26] sched: Fix user time incorrectly accounted as system time on 32-bit We have 32-bit variable overflow possibility when multiply in task_times() and thread_group_times() functions. When the overflow happens then the scaled utime value becomes erroneously small and the scaled stime becomes i erroneously big. Reported here: https://bugzilla.redhat.com/show_bug.cgi?id=633037 https://bugzilla.kernel.org/show_bug.cgi?id=16559 Reported-by: Michael Chapman Reported-by: Ciriaco Garcia de Celis Signed-off-by: Stanislaw Gruszka Signed-off-by: Peter Zijlstra Cc: Hidetoshi Seto Cc: # 2.6.32.19+ (partially) and 2.6.33+ LKML-Reference: <20100914143513.GB8415@redhat.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index ed09d4f2a69c5..dc85ceb908322 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3513,9 +3513,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) rtime = nsecs_to_cputime(p->se.sum_exec_runtime); if (total) { - u64 temp; + u64 temp = rtime; - temp = (u64)(rtime * utime); + temp *= utime; do_div(temp, total); utime = (cputime_t)temp; } else @@ -3546,9 +3546,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) rtime = nsecs_to_cputime(cputime.sum_exec_runtime); if (total) { - u64 temp; + u64 temp = rtime; - temp = (u64)(rtime * cputime.utime); + temp *= cputime.utime; do_div(temp, total); utime = (cputime_t)temp; } else From c33f543d320843e1732534c3931da4bbd18e6c14 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Wed, 8 Sep 2010 10:34:28 -0400 Subject: [PATCH 11/26] oprofile: Add Support for Intel CPU Family 6 / Model 22 (Intel Celeron 540) This patch adds CPU type detection for the Intel Celeron 540, which is part of the Core 2 family according to Wikipedia; the family and ID pair is absent from the Volume 3B table referenced in the source code comments. I have tested this patch on an Intel Celeron 540 machine reporting itself as Family 6 Model 22, and OProfile runs on the machine without issue. Spec: http://download.intel.com/design/mobile/SPECUPDT/317667.pdf Signed-off-by: Patrick Simmons Acked-by: Andi Kleen Acked-by: Arnd Bergmann Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cfe4faabb0f67..009b819f48d0a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -671,7 +671,9 @@ static int __init ppro_init(char **cpu_type) case 14: *cpu_type = "i386/core"; break; - case 15: case 23: + case 0x0f: + case 0x16: + case 0x17: *cpu_type = "i386/core_2"; break; case 0x1a: From ae00d4f37f4df56821331deb1028748110dd6dc9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 16 Sep 2010 16:26:51 -0700 Subject: [PATCH 12/26] ceph: fix cap_snap and realm split The cap_snap creation/queueing relies on both the current i_head_snapc _and_ the i_snap_realm pointers being correct, so that the new cap_snap can properly reference the old context and the new i_head_snapc can be updated to reference the new snaprealm's context. To fix this, we: - move inodes completely to the new (split) realm so that i_snap_realm is correct, and - generate the new snapc's _before_ queueing the cap_snaps in ceph_update_snap_trace(). Signed-off-by: Sage Weil --- fs/ceph/addr.c | 4 +-- fs/ceph/snap.c | 88 ++++++++++++++++--------------------------------- fs/ceph/super.h | 2 ++ 3 files changed, 33 insertions(+), 61 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 50461b8c23a4e..efbc604001c8b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) if (i_size < page_off + len) len = i_size - page_off; - dout("writepage %p page %p index %lu on %llu~%u\n", - inode, page, page->index, page_off, len); + dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", + inode, page, page->index, page_off, len, snapc); writeback_stat = atomic_long_inc_return(&client->writeback_count); if (writeback_stat > diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 9e836afba3411..9e6eef14b7df4 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); INIT_LIST_HEAD(&realm->empty_item); + INIT_LIST_HEAD(&realm->dirty_item); INIT_LIST_HEAD(&realm->inodes_with_caps); spin_lock_init(&realm->inodes_with_caps_lock); __insert_snap_realm(&mdsc->snap_realms, realm); @@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm; int invalidate = 0; int err = -ENOMEM; + LIST_HEAD(dirty_realms); dout("update_snap_trace deletion=%d\n", deletion); more: @@ -626,24 +628,6 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, } } - if (le64_to_cpu(ri->seq) > realm->seq) { - dout("update_snap_trace updating %llx %p %lld -> %lld\n", - realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); - /* - * if the realm seq has changed, queue a cap_snap for every - * inode with open caps. we do this _before_ we update - * the realm info so that we prepare for writeback under the - * _previous_ snap context. - * - * ...unless it's a snap deletion! - */ - if (!deletion) - queue_realm_cap_snaps(realm); - } else { - dout("update_snap_trace %llx %p seq %lld unchanged\n", - realm->ino, realm, realm->seq); - } - /* ensure the parent is correct */ err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); if (err < 0) @@ -651,6 +635,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, invalidate += err; if (le64_to_cpu(ri->seq) > realm->seq) { + dout("update_snap_trace updating %llx %p %lld -> %lld\n", + realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); realm->created = le64_to_cpu(ri->created); @@ -668,9 +654,17 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, if (err < 0) goto fail; + /* queue realm for cap_snap creation */ + list_add(&realm->dirty_item, &dirty_realms); + invalidate = 1; } else if (!realm->cached_context) { + dout("update_snap_trace %llx %p seq %lld new\n", + realm->ino, realm, realm->seq); invalidate = 1; + } else { + dout("update_snap_trace %llx %p seq %lld unchanged\n", + realm->ino, realm, realm->seq); } dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, @@ -683,6 +677,14 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, if (invalidate) rebuild_snap_realms(realm); + /* + * queue cap snaps _after_ we've built the new snap contexts, + * so that i_head_snapc can be set appropriately. + */ + list_for_each_entry(realm, &dirty_realms, dirty_item) { + queue_realm_cap_snaps(realm); + } + __cleanup_empty_realms(mdsc); return 0; @@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, }; struct inode *inode = ceph_find_inode(sb, vino); struct ceph_inode_info *ci; + struct ceph_snap_realm *oldrealm; if (!inode) continue; @@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, dout(" will move %p to split realm %llx %p\n", inode, realm->ino, realm); /* - * Remove the inode from the realm's inode - * list, but don't add it to the new realm - * yet. We don't want the cap_snap to be - * queued (again) by ceph_update_snap_trace() - * below. Queue it _now_, under the old context. + * Move the inode to the new realm */ spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + list_add(&ci->i_snap_realm_item, + &realm->inodes_with_caps); + oldrealm = ci->i_snap_realm; + ci->i_snap_realm = realm; spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&inode->i_lock); - ceph_queue_cap_snap(ci); + ceph_get_snap_realm(mdsc, realm); + ceph_put_snap_realm(mdsc, oldrealm); iput(inode); continue; @@ -880,43 +884,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, ceph_update_snap_trace(mdsc, p, e, op == CEPH_SNAP_OP_DESTROY); - if (op == CEPH_SNAP_OP_SPLIT) { - /* - * ok, _now_ add the inodes into the new realm. - */ - for (i = 0; i < num_split_inos; i++) { - struct ceph_vino vino = { - .ino = le64_to_cpu(split_inos[i]), - .snap = CEPH_NOSNAP, - }; - struct inode *inode = ceph_find_inode(sb, vino); - struct ceph_inode_info *ci; - - if (!inode) - continue; - ci = ceph_inode(inode); - spin_lock(&inode->i_lock); - if (list_empty(&ci->i_snap_realm_item)) { - struct ceph_snap_realm *oldrealm = - ci->i_snap_realm; - - dout(" moving %p to split realm %llx %p\n", - inode, realm->ino, realm); - spin_lock(&realm->inodes_with_caps_lock); - list_add(&ci->i_snap_realm_item, - &realm->inodes_with_caps); - ci->i_snap_realm = realm; - spin_unlock(&realm->inodes_with_caps_lock); - ceph_get_snap_realm(mdsc, realm); - ceph_put_snap_realm(mdsc, oldrealm); - } - spin_unlock(&inode->i_lock); - iput(inode); - } - + if (op == CEPH_SNAP_OP_SPLIT) /* we took a reference when we created the realm, above */ ceph_put_snap_realm(mdsc, realm); - } __cleanup_empty_realms(mdsc); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c33897ae5725e..c80bfbe27b052 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -690,6 +690,8 @@ struct ceph_snap_realm { struct list_head empty_item; /* if i have ref==0 */ + struct list_head dirty_item; /* if realm needs new context */ + /* the current set of snaps for this realm */ struct ceph_snap_context *cached_context; From 89e45aac42d40426c97e6901811309bf49c4993f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 17 Sep 2010 03:24:13 +0200 Subject: [PATCH 13/26] x86: Fix instruction breakpoint encoding Lengths and types of breakpoints are encoded in a half byte into CPU registers. However when we extract these values and store them, we add a high half byte part to them: 0x40 to the length and 0x80 to the type. When that gets reloaded to the CPU registers, the high part is masked. While making the instruction breakpoints available for perf, I zapped that high part on instruction breakpoint encoding and that broke the arch -> generic translation used by ptrace instruction breakpoints. Writing dr7 to set an inst breakpoint was then failing. There is no apparent reason for these high parts so we could get rid of them altogether. That's an invasive change though so let's do that later and for now fix the problem by restoring that inst breakpoint high part encoding in this sole patch. Reported-by: Kelvie Wong Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Mahesh Salgaonkar Cc: Will Deacon --- arch/x86/include/asm/hw_breakpoint.h | 2 +- arch/x86/kernel/hw_breakpoint.c | 40 +++++++++++++--------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 528a11e8d3e35..824ca07860d01 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,7 +20,7 @@ struct arch_hw_breakpoint { #include /* Available HW breakpoint length encodings */ -#define X86_BREAKPOINT_LEN_X 0x00 +#define X86_BREAKPOINT_LEN_X 0x40 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec37c32f8..ff15c9dcc25de 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { - /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_X: + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + if (x86_len != X86_BREAKPOINT_LEN_X) + return -EINVAL; + + *gen_type = HW_BREAKPOINT_X; *gen_len = sizeof(long); + return 0; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (x86_len) { case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, return -EINVAL; } - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - *gen_type = HW_BREAKPOINT_X; - break; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - return 0; } @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { - case X86_BREAKPOINT_LEN_X: - align = sizeof(long) -1; - break; case X86_BREAKPOINT_LEN_1: align = 0; break; From 068e35eee9ef98eb4cab55181977e24995d273be Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 13 Sep 2010 13:01:18 -0700 Subject: [PATCH 14/26] hw breakpoints: Fix pid namespace bug Hardware breakpoints can't be registered within pid namespaces because tsk->pid is passed rather than the pid in the current namespace. (See https://bugzilla.kernel.org/show_bug.cgi?id=17281 ) This is a quick fix demonstrating the problem but is not the best method of solving the problem since passing pids internally is not the best way to avoid pid namespace bugs. Subsequent patches will show a better solution. Much thanks to Frederic Weisbecker for doing the bulk of the work finding this bug. Reported-by: Robin Green Signed-off-by: Matt Helsley Signed-off-by: Peter Zijlstra Cc: Prasad Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Will Deacon Cc: Mahesh Salgaonkar Cc: 2.6.33-2.6.35 LKML-Reference: Signed-off-by: Ingo Molnar Signed-off-by: Frederic Weisbecker --- kernel/hw_breakpoint.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index d71a987fd2bf2..c7c2aed9e2dcc 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -433,7 +433,8 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, struct task_struct *tsk) { - return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); + return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), + triggered); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); From 79077319d7c7844d5d836e52099a7a1bcadf9b04 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Sep 2010 19:58:04 +0100 Subject: [PATCH 15/26] drm/i915/crt: Downgrade warnings for hotplug failures These are not fatal errors, so do not alarm the user by filling the logs with *** ERROR ***. Especially as we know that g4x CRT detection is a little sticky. On the one hand the errors are valid since they are warning us of a stall -- we poll the register whilst holding the mode lock so not even the mouse will update. On the other hand, those stalls were already present yet nobody complained. Reported-by: Andi Kleen Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=18332 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_crt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 4b7735196cd5a..8f6f38c7d84d5 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -188,7 +188,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) if (wait_for((I915_READ(PCH_ADPA) & ADPA_CRT_HOTPLUG_FORCE_TRIGGER) == 0, 1000, 1)) - DRM_ERROR("timed out waiting for FORCE_TRIGGER"); + DRM_DEBUG_KMS("timed out waiting for FORCE_TRIGGER"); if (turn_off_dac) { I915_WRITE(PCH_ADPA, temp); @@ -245,7 +245,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) if (wait_for((I915_READ(PORT_HOTPLUG_EN) & CRT_HOTPLUG_FORCE_DETECT) == 0, 1000, 1)) - DRM_ERROR("timed out waiting for FORCE_DETECT to go off"); + DRM_DEBUG_KMS("timed out waiting for FORCE_DETECT to go off"); } stat = I915_READ(PORT_HOTPLUG_STAT); From e259befd9013e212648c3bd4f6f1fbf92d0dd51d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Sep 2010 00:32:02 +0100 Subject: [PATCH 16/26] drm/i915: Fix Sandybridge fence registers With 5 places to update when adding handling for fence registers, it is easy to overlook one or two. Correct that oversight, but fence management should be improved before a new set of registers is added. Bugzilla: https://bugs.freedesktop.org/show_bug?id=30199 Original patch by: Yuanhan Liu Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/gpu/drm/i915/i915_gem.c | 37 +++++++++++++++++++---------- drivers/gpu/drm/i915/i915_suspend.c | 36 +++++++++++++++++++++------- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 16fca1d1799a4..cf4ffbee1c006 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2351,14 +2351,21 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) reg->obj = obj; - if (IS_GEN6(dev)) + switch (INTEL_INFO(dev)->gen) { + case 6: sandybridge_write_fence_reg(reg); - else if (IS_I965G(dev)) + break; + case 5: + case 4: i965_write_fence_reg(reg); - else if (IS_I9XX(dev)) + break; + case 3: i915_write_fence_reg(reg); - else + break; + case 2: i830_write_fence_reg(reg); + break; + } trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg, obj_priv->tiling_mode); @@ -2381,22 +2388,26 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj) struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[obj_priv->fence_reg]; + uint32_t fence_reg; - if (IS_GEN6(dev)) { + switch (INTEL_INFO(dev)->gen) { + case 6: I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (obj_priv->fence_reg * 8), 0); - } else if (IS_I965G(dev)) { + break; + case 5: + case 4: I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); - } else { - uint32_t fence_reg; - - if (obj_priv->fence_reg < 8) - fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; + break; + case 3: + if (obj_priv->fence_reg > 8) + fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4; else - fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - - 8) * 4; + case 2: + fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; I915_WRITE(fence_reg, 0); + break; } reg->obj = NULL; diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 2c6b98f2440ef..31f08581e93a4 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -789,16 +789,25 @@ int i915_save_state(struct drm_device *dev) dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2)); /* Fences */ - if (IS_I965G(dev)) { + switch (INTEL_INFO(dev)->gen) { + case 6: + for (i = 0; i < 16; i++) + dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8)); + break; + case 5: + case 4: for (i = 0; i < 16; i++) dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8)); - } else { - for (i = 0; i < 8; i++) - dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); - + break; + case 3: if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) for (i = 0; i < 8; i++) dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4)); + case 2: + for (i = 0; i < 8; i++) + dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); + break; + } return 0; @@ -815,15 +824,24 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(HWS_PGA, dev_priv->saveHWS); /* Fences */ - if (IS_I965G(dev)) { + switch (INTEL_INFO(dev)->gen) { + case 6: + for (i = 0; i < 16; i++) + I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), dev_priv->saveFENCE[i]); + break; + case 5: + case 4: for (i = 0; i < 16; i++) I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]); - } else { - for (i = 0; i < 8; i++) - I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); + break; + case 3: + case 2: if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) for (i = 0; i < 8; i++) I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]); + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); + break; } i915_restore_display(dev); From 41a51428916ab04587bacee2dda61c4a0c4fc02f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Sep 2010 08:22:30 +0100 Subject: [PATCH 17/26] drm/i915,agp/intel: Add second set of PCI-IDs for B43 There is a second revision of B43 (a desktop gen4 part) floating around, functionally equivalent to the original B43, so simply add the new PCI-IDs. Bugzilla: https://bugs.freedesktop.org/show_bugs.cgi?id=30221 Signed-off-by: Chris Wilson Cc: stable@kernel.org --- drivers/char/agp/intel-agp.c | 2 ++ drivers/char/agp/intel-agp.h | 2 ++ drivers/gpu/drm/i915/i915_drv.c | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index eab58db5f91cd..cd18493c95279 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -806,6 +806,8 @@ static const struct intel_driver_description { "G45/G43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, "B43", NULL, &intel_i965_driver }, + { PCI_DEVICE_ID_INTEL_B43_1_HB, PCI_DEVICE_ID_INTEL_B43_1_IG, + "B43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, "G41", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG, diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h index ee189c74d345e..d09b1ab7e8abe 100644 --- a/drivers/char/agp/intel-agp.h +++ b/drivers/char/agp/intel-agp.h @@ -186,6 +186,8 @@ #define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2 #define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40 #define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42 +#define PCI_DEVICE_ID_INTEL_B43_1_HB 0x2E90 +#define PCI_DEVICE_ID_INTEL_B43_1_IG 0x2E92 #define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40 #define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42 #define PCI_DEVICE_ID_INTEL_EAGLELAKE_HB 0x2E00 diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 216deb579785e..6dbe14cc4f747 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -170,6 +170,7 @@ static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_VGA_DEVICE(0x2e22, &intel_g45_info), /* G45_G */ INTEL_VGA_DEVICE(0x2e32, &intel_g45_info), /* G41_G */ INTEL_VGA_DEVICE(0x2e42, &intel_g45_info), /* B43_G */ + INTEL_VGA_DEVICE(0x2e92, &intel_g45_info), /* B43_G.1 */ INTEL_VGA_DEVICE(0xa001, &intel_pineview_info), INTEL_VGA_DEVICE(0xa011, &intel_pineview_info), INTEL_VGA_DEVICE(0x0042, &intel_ironlake_d_info), From e835124c2be289515b918f2688ced4249e2de566 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Sep 2010 08:03:08 -0700 Subject: [PATCH 18/26] ceph: only send one flushsnap per cap_snap per mds session Sending multiple flushsnap messages is problematic because we ignore the response if the tid doesn't match, and the server may only respond to each one once. It's also a waste. So, skip cap_snaps that are already on the flushing list, unless the caller tells us to resend (because we are reconnecting). Signed-off-by: Sage Weil --- fs/ceph/caps.c | 19 +++++++++++++++---- fs/ceph/snap.c | 2 +- fs/ceph/super.h | 3 ++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 9fbe9019155c1..b01c316a81487 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * + * Unless @again is true, skip cap_snaps that were already sent to + * the MDS (i.e., during this session). + * * Called under i_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession) + struct ceph_mds_session **psession, + int again) __releases(ci->vfs_inode->i_lock) __acquires(ci->vfs_inode->i_lock) { @@ -1240,6 +1244,13 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, dout("no auth cap (migrating?), doing nothing\n"); goto out; } + + /* only flush each capsnap once */ + if (!again && !list_empty(&capsnap->flushing_item)) { + dout("already flushed %p, skipping\n", capsnap); + continue; + } + mds = ci->i_auth_cap->session->s_mds; mseq = ci->i_auth_cap->mseq; @@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) struct inode *inode = &ci->vfs_inode; spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, NULL); + __ceph_flush_snaps(ci, NULL, 0); spin_unlock(&inode->i_lock); } @@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* flush snaps first time around only */ if (!list_empty(&ci->i_cap_snaps)) - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); goto retry_locked; retry: spin_lock(&inode->i_lock); @@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, if (cap && cap->session == session) { dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, cap, capsnap); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 1); } else { pr_err("%p auth cap %p not mds%d ???\n", inode, cap, session->s_mds); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 9e6eef14b7df4..190b6c4a6f2b9 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -717,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) igrab(inode); spin_unlock(&mdsc->snap_flush_lock); spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); spin_unlock(&inode->i_lock); iput(inode); spin_lock(&mdsc->snap_flush_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c80bfbe27b052..b87638e84c4bc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -828,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); extern void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession); + struct ceph_mds_session **psession, + int again); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); From a43fb73101eaf6db0b33d22c152b338ab8b3edbb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Sep 2010 09:54:08 -0700 Subject: [PATCH 19/26] ceph: check mapping to determine if FILE_CACHE cap is used See if the i_data mapping has any pages to determine if the FILE_CACHE capability is currently in use, instead of assuming it is any time the rdcache_gen value is set (i.e., issued -> used). This allows the MDS RECALL_STATE process work for inodes that have cached pages. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b01c316a81487..73c153092f729 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->i_rdcache_gen) + if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; From be4f104dfd3b5e3ae262bff607965cfc38027dec Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Sep 2010 12:30:31 -0700 Subject: [PATCH 20/26] ceph: select CRYPTO We select CRYPTO_AES, but not CRYPTO. Signed-off-by: Sage Weil --- fs/ceph/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27ea..0fcd2640c23fd 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,6 +3,7 @@ config CEPH_FS depends on INET && EXPERIMENTAL select LIBCRC32C select CRYPTO_AES + select CRYPTO help Choose Y or M here to include support for mounting the experimental Ceph distributed file system. Ceph is an extremely From af6261031317f646d22f994c0b467521e47aa49f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Sep 2010 10:31:40 +0100 Subject: [PATCH 21/26] drm/i915: Hold a reference to the object whilst unbinding the eviction list During heavy aperture thrashing we may be forced to wait upon several active objects during eviction. The active list may be the last reference to these objects and so the action of waiting upon one of them may cause another to be freed (and itself unbound). To prevent the object disappearing underneath us, we need to acquire and hold a reference whilst unbinding. This should fix the reported page refcount OOPS: kernel BUG at drivers/gpu/drm/i915/i915_gem.c:1444! ... RIP: 0010:[] [] i915_gem_object_put_pages+0x25/0xf5 [i915] Call Trace: [] i915_gem_object_unbind+0xc5/0x1a7 [i915] [] i915_gem_evict_something+0x3bd/0x409 [i915] [] ? drm_gem_object_lookup+0x27/0x57 [drm] [] i915_gem_object_bind_to_gtt+0x1d3/0x279 [i915] [] i915_gem_object_pin+0xa3/0x146 [i915] [] ? drm_gem_object_lookup+0x4c/0x57 [drm] [] i915_gem_do_execbuffer+0x50d/0xe32 [i915] Reported-by: Shawn Starr Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=18902 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_evict.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 72cae3cccad88..e85246ef691ce 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -79,6 +79,7 @@ mark_free(struct drm_i915_gem_object *obj_priv, struct list_head *unwind) { list_add(&obj_priv->evict_list, unwind); + drm_gem_object_reference(&obj_priv->base); return drm_mm_scan_add_block(obj_priv->gtt_space); } @@ -165,6 +166,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignmen list_for_each_entry(obj_priv, &unwind_list, evict_list) { ret = drm_mm_scan_remove_block(obj_priv->gtt_space); BUG_ON(ret); + drm_gem_object_unreference(&obj_priv->base); } /* We expect the caller to unpin, evict all and try again, or give up. @@ -181,18 +183,21 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignmen * scanning, therefore store to be evicted objects on a * temporary list. */ list_move(&obj_priv->evict_list, &eviction_list); - } + } else + drm_gem_object_unreference(&obj_priv->base); } /* Unbinding will emit any required flushes */ list_for_each_entry_safe(obj_priv, tmp_obj_priv, &eviction_list, evict_list) { #if WATCH_LRU - DRM_INFO("%s: evicting %p\n", __func__, obj); + DRM_INFO("%s: evicting %p\n", __func__, &obj_priv->base); #endif ret = i915_gem_object_unbind(&obj_priv->base); if (ret) return ret; + + drm_gem_object_unreference(&obj_priv->base); } /* The just created free hole should be on the top of the free stack From 6df7aadcd9290807c464675098b5dd2dc9da5075 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 16 Sep 2010 14:43:08 +0530 Subject: [PATCH 22/26] virtio: console: Fix poll blocking even though there is data to read I found this while working on a Linux agent for spice, the symptom I was seeing was select blocking on the spice vdagent virtio serial port even though there were messages queued up there. virtio_console's port_fops_poll checks port->inbuf != NULL to determine if read won't block. However if an application reads enough bytes from inbuf through port_fops_read, to empty the current port->inbuf, port->inbuf will be NULL even though there may be buffers left in the virtqueue. This causes poll() to block even though there is data to be read, this patch fixes this by using will_read_block(port) instead of the port->inbuf != NULL check. Signed-off-By: Hans de Goede Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Cc: stable@kernel.org --- drivers/char/virtio_console.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 942a9826bd23e..2f2e31b58b345 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -642,7 +642,7 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) poll_wait(filp, &port->waitqueue, wait); ret = 0; - if (port->inbuf) + if (!will_read_block(port)) ret |= POLLIN | POLLRDNORM; if (!will_write_block(port)) ret |= POLLOUT; From 65745422a898741ee0e7068ef06624ab06e8aefa Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Tue, 14 Sep 2010 13:26:16 +0530 Subject: [PATCH 23/26] virtio: console: Prevent userspace from submitting NULL buffers A userspace could submit a buffer with 0 length to be written to the host. Prevent such a situation. This was not needed previously, but recent changes in the way write() works exposed this condition to trigger a virtqueue event to the host, causing a NULL buffer to be sent across. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell CC: stable@kernel.org --- drivers/char/virtio_console.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 2f2e31b58b345..c810481a5bc23 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -596,6 +596,10 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, ssize_t ret; bool nonblock; + /* Userspace could be out to fool us */ + if (!count) + return 0; + port = filp->private_data; nonblock = filp->f_flags & O_NONBLOCK; From 9b6efcd2e2275e13403700913b7a1da92cf11ad2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 21 Sep 2010 10:54:01 -0600 Subject: [PATCH 24/26] lguest: update comments to reflect LHCALL_LOAD_GDT_ENTRY. We used to have a hypercall which reloaded the entire GDT, then we switched to one which loaded a single entry (to match the IDT code). Some comments were not updated, so fix them. Signed-off-by: Rusty Russell Reported by: Eviatar Khen --- arch/x86/lguest/boot.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9257510b48368..9d5f558484558 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -324,9 +324,8 @@ static void lguest_load_gdt(const struct desc_ptr *desc) } /* - * For a single GDT entry which changes, we do the lazy thing: alter our GDT, - * then tell the Host to reload the entire thing. This operation is so rare - * that this naive implementation is reasonable. + * For a single GDT entry which changes, we simply change our copy and + * then tell the host about it. */ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, const void *desc, int type) @@ -338,9 +337,13 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, } /* - * OK, I lied. There are three "thread local storage" GDT entries which change + * There are three "thread local storage" GDT entries which change * on every context switch (these three entries are how glibc implements - * __thread variables). So we have a hypercall specifically for this case. + * __thread variables). As an optimization, we have a hypercall + * specifically for this case. + * + * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall + * which took a range of entries? */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { From f6c3f1686e7ec1dd8725a9a3dcb857dfd0c7a5bf Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 13 Sep 2010 11:02:21 -0700 Subject: [PATCH 25/26] sched: Fix nohz balance kick There's a situation where the nohz balancer will try to wake itself: cpu-x is idle which is also ilb_cpu got a scheduler tick during idle and the nohz_kick_needed() in trigger_load_balance() checks for rq_x->nr_running which might not be zero (because of someone waking a task on this rq etc) and this leads to the situation of the cpu-x sending a kick to itself. And this can cause a lockup. Avoid this by not marking ourself eligible for kicking. Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra LKML-Reference: <1284400941.2684.19.camel@sbsiddha-MOBL3.sc.intel.com> Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a171138a94026..db3f674ca49db 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3630,7 +3630,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) if (time_before(now, nohz.next_balance)) return 0; - if (!rq->nr_running) + if (rq->idle_at_tick) return 0; first_pick_cpu = atomic_read(&nohz.first_pick_cpu); From 8b15575cae7a93a784c3005c42b069edd9ba64dd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Sep 2010 14:35:37 -0700 Subject: [PATCH 26/26] fs: {lock,unlock}_flocks() stubs to prepare for BKL removal The lock structs are currently protected by the BKL, but are accessed by code in fs/locks.c and misc file system and DLM code. These stubs will allow all users to switch to the new interface before the implementation is changed to a spinlock. Acked-by: Arnd Bergmann Signed-off-by: Sage Weil Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b6147582..63d069bd80b70 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1093,6 +1093,10 @@ struct file_lock { #include +/* temporary stubs for BKL removal */ +#define lock_flocks() lock_kernel() +#define unlock_flocks() unlock_kernel() + extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING