From eb9ac48e1fea4a6105057447be11ac0a714ba5a0 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoffrey.levand@am.sony.com>
Date: Fri, 16 May 2008 06:09:59 +1000
Subject: [PATCH] --- yaml --- r: 97213 b: refs/heads/master c:
 ecc240f90bce23651f9866a1523ba55faa89f009 h: refs/heads/master i:   97211:
 dc900f8b467f0e1b452bc1cb5b4bc570439cc35b v: v3

---
 [refs]                                        |   2 +-
 .../Documentation/DocBook/kernel-locking.tmpl |  25 ----
 trunk/arch/powerpc/platforms/ps3/mm.c         |   3 +
 trunk/arch/x86/boot/printf.c                  |   2 +-
 trunk/drivers/acpi/dispatcher/dsmethod.c      |   2 +-
 trunk/drivers/acpi/executer/exmutex.c         |   4 +-
 trunk/drivers/isdn/hysdn/hycapi.c             |   6 +-
 trunk/drivers/scsi/3w-9xxx.c                  |   6 +-
 trunk/drivers/scsi/aha152x.c                  |   4 +-
 trunk/drivers/scsi/atp870u.c                  |   2 +-
 trunk/drivers/scsi/hptiop.c                   |  12 +-
 trunk/drivers/scsi/qla1280.c                  |   2 +-
 trunk/drivers/video/aty/atyfb_base.c          |   2 +-
 trunk/drivers/video/aty/radeon_base.c         |   4 +-
 trunk/drivers/video/matrox/matroxfb_base.h    |   2 +-
 trunk/drivers/video/sis/sis_main.c            |   2 +-
 trunk/fs/xfs/linux-2.6/xfs_buf.c              |  24 +---
 trunk/fs/xfs/linux-2.6/xfs_buf.h              |  19 ---
 trunk/fs/xfs/linux-2.6/xfs_file.c             |  17 +--
 trunk/fs/xfs/linux-2.6/xfs_vnode.h            |   8 ++
 trunk/fs/xfs/xfs_inode.c                      |   9 +-
 trunk/fs/xfs/xfs_vnodeops.c                   | 112 +++++++++++-------
 trunk/fs/xfs/xfs_vnodeops.h                   |   3 +-
 trunk/kernel/module.c                         |  18 +--
 trunk/kernel/stop_machine.c                   |   7 +-
 25 files changed, 129 insertions(+), 168 deletions(-)
diff --git a/[refs] b/[refs]
index dc4e34f3e20a..45ff5816ee74 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 6483d152acffb83442b90dad1517fde8a7b1e12d
+refs/heads/master: ecc240f90bce23651f9866a1523ba55faa89f009
diff --git a/trunk/Documentation/DocBook/kernel-locking.tmpl b/trunk/Documentation/DocBook/kernel-locking.tmpl
index 2510763295d0..77c42f40be5d 100644
--- a/trunk/Documentation/DocBook/kernel-locking.tmpl
+++ b/trunk/Documentation/DocBook/kernel-locking.tmpl
@@ -703,31 +703,6 @@
 </sect1>
 </chapter>
 
-<chapter id="trylock-functions">
- <title>The trylock Functions</title>
-  <para>
-   There are functions that try to acquire a lock only once and immediately
-   return a value telling about success or failure to acquire the lock.
-   They can be used if you need no access to the data protected with the lock
-   when some other thread is holding the lock. You should acquire the lock
-   later if you then need access to the data protected with the lock.
-  </para>
-
-  <para>
-    <function>spin_trylock()</function> does not spin but returns non-zero if
-    it acquires the spinlock on the first try or 0 if not. This function can
-    be used in all contexts like <function>spin_lock</function>: you must have
-    disabled the contexts that might interrupt you and acquire the spin lock.
-  </para>
-
-  <para>
-    <function>mutex_trylock()</function> does not suspend your task
-    but returns non-zero if it could lock the mutex on the first try
-    or 0 if not. This function cannot be safely used in hardware or software
-    interrupt contexts despite not sleeping.
-  </para>
-</chapter>
-
   <chapter id="Examples">
    <title>Common Examples</title>
     <para>
diff --git a/trunk/arch/powerpc/platforms/ps3/mm.c b/trunk/arch/powerpc/platforms/ps3/mm.c
index 5b3fb2b321ab..3a58ffabccd9 100644
--- a/trunk/arch/powerpc/platforms/ps3/mm.c
+++ b/trunk/arch/powerpc/platforms/ps3/mm.c
@@ -317,6 +317,9 @@ static int __init ps3_mm_add_memory(void)
 		return result;
 	}
 
+	lmb_add(start_addr, map.r1.size);
+	lmb_analyze();
+
 	result = online_pages(start_pfn, nr_pages);
 
 	if (result)
diff --git a/trunk/arch/x86/boot/printf.c b/trunk/arch/x86/boot/printf.c
index 50e47cdbdddd..c1d00c0274c4 100644
--- a/trunk/arch/x86/boot/printf.c
+++ b/trunk/arch/x86/boot/printf.c
@@ -56,7 +56,7 @@ static char *number(char *str, long num, int base, int size, int precision,
 	if (type & LEFT)
 		type &= ~ZEROPAD;
 	if (base < 2 || base > 36)
-		return NULL;
+		return 0;
 	c = (type & ZEROPAD) ? '0' : ' ';
 	sign = 0;
 	if (type & SIGN) {
diff --git a/trunk/drivers/acpi/dispatcher/dsmethod.c b/trunk/drivers/acpi/dispatcher/dsmethod.c
index 2509809a36cf..e48a3ea03117 100644
--- a/trunk/drivers/acpi/dispatcher/dsmethod.c
+++ b/trunk/drivers/acpi/dispatcher/dsmethod.c
@@ -565,7 +565,7 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc,
 
 				acpi_os_release_mutex(method_desc->method.
 						      mutex->mutex.os_mutex);
-				method_desc->method.mutex->mutex.thread_id = NULL;
+				method_desc->method.mutex->mutex.thread_id = 0;
 			}
 		}
 
diff --git a/trunk/drivers/acpi/executer/exmutex.c b/trunk/drivers/acpi/executer/exmutex.c
index a8bf3d713e28..c873ab40cd0e 100644
--- a/trunk/drivers/acpi/executer/exmutex.c
+++ b/trunk/drivers/acpi/executer/exmutex.c
@@ -326,7 +326,7 @@ acpi_status acpi_ex_release_mutex_object(union acpi_operand_object *obj_desc)
 
 	/* Clear mutex info */
 
-	obj_desc->mutex.thread_id = NULL;
+	obj_desc->mutex.thread_id = 0;
 	return_ACPI_STATUS(status);
 }
 
@@ -463,7 +463,7 @@ void acpi_ex_release_all_mutexes(struct acpi_thread_state *thread)
 		/* Mark mutex unowned */
 
 		obj_desc->mutex.owner_thread = NULL;
-		obj_desc->mutex.thread_id = NULL;
+		obj_desc->mutex.thread_id = 0;
 
 		/* Update Thread sync_level (Last mutex is the important one) */
 
diff --git a/trunk/drivers/isdn/hysdn/hycapi.c b/trunk/drivers/isdn/hysdn/hycapi.c
index 53f6ad1235db..d3999a8e9f88 100644
--- a/trunk/drivers/isdn/hysdn/hycapi.c
+++ b/trunk/drivers/isdn/hysdn/hycapi.c
@@ -462,11 +462,11 @@ static int hycapi_read_proc(char *page, char **start, off_t off,
 		default: s = "???"; break;
 	}
 	len += sprintf(page+len, "%-16s %s\n", "type", s);
-	if ((s = cinfo->version[VER_DRIVER]) != NULL)
+	if ((s = cinfo->version[VER_DRIVER]) != 0)
 		len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
-	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
+	if ((s = cinfo->version[VER_CARDTYPE]) != 0)
 		len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
-	if ((s = cinfo->version[VER_SERIAL]) != NULL)
+	if ((s = cinfo->version[VER_SERIAL]) != 0)
 		len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
     
 	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
diff --git a/trunk/drivers/scsi/3w-9xxx.c b/trunk/drivers/scsi/3w-9xxx.c
index 867f6fd5c2c0..b31faeccb9cd 100644
--- a/trunk/drivers/scsi/3w-9xxx.c
+++ b/trunk/drivers/scsi/3w-9xxx.c
@@ -1278,7 +1278,7 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance)
 			error = 0;
 			/* Check for command packet errors */
 			if (full_command_packet->command.newcommand.status != 0) {
-				if (tw_dev->srb[request_id] != NULL) {
+				if (tw_dev->srb[request_id] != 0) {
 					error = twa_fill_sense(tw_dev, request_id, 1, 1);
 				} else {
 					/* Skip ioctl error prints */
@@ -1290,7 +1290,7 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance)
 
 			/* Check for correct state */
 			if (tw_dev->state[request_id] != TW_S_POSTED) {
-				if (tw_dev->srb[request_id] != NULL) {
+				if (tw_dev->srb[request_id] != 0) {
 					TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1a, "Received a request id that wasn't posted");
 					TW_CLEAR_ALL_INTERRUPTS(tw_dev);
 					goto twa_interrupt_bail;
@@ -1298,7 +1298,7 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance)
 			}
 
 			/* Check for internal command completion */
-			if (tw_dev->srb[request_id] == NULL) {
+			if (tw_dev->srb[request_id] == 0) {
 				if (request_id != tw_dev->chrdev_request_id) {
 					if (twa_aen_complete(tw_dev, request_id))
 						TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1b, "Error completing AEN during attention interrupt");
diff --git a/trunk/drivers/scsi/aha152x.c b/trunk/drivers/scsi/aha152x.c
index 0899cb61e3dd..1dca1775f4b1 100644
--- a/trunk/drivers/scsi/aha152x.c
+++ b/trunk/drivers/scsi/aha152x.c
@@ -3582,7 +3582,7 @@ static int checksetup(struct aha152x_setup *setup)
 	if (i == ARRAY_SIZE(ports))
 		return 0;
 
-	if (!request_region(setup->io_port, IO_RANGE, "aha152x")) {
+	if ( request_region(setup->io_port, IO_RANGE, "aha152x")==0 ) {
 		printk(KERN_ERR "aha152x: io port 0x%x busy.\n", setup->io_port);
 		return 0;
 	}
@@ -3842,7 +3842,7 @@ static int __init aha152x_init(void)
 			if ((setup_count == 1) && (setup[0].io_port == ports[i]))
 				continue;
 
-			if (!request_region(ports[i], IO_RANGE, "aha152x")) {
+			if ( request_region(ports[i], IO_RANGE, "aha152x")==0 ) {
 				printk(KERN_ERR "aha152x: io port 0x%x busy.\n", ports[i]);
 				continue;
 			}
diff --git a/trunk/drivers/scsi/atp870u.c b/trunk/drivers/scsi/atp870u.c
index 7d311541c76c..db6de5e6afb3 100644
--- a/trunk/drivers/scsi/atp870u.c
+++ b/trunk/drivers/scsi/atp870u.c
@@ -747,7 +747,7 @@ static void send_s870(struct atp_unit *dev,unsigned char c)
 		dev->quhd[c] = 0;
 	}
 	workreq = dev->quereq[c][dev->quhd[c]];
-	if (dev->id[c][scmd_id(workreq)].curr_req == NULL) {
+	if (dev->id[c][scmd_id(workreq)].curr_req == 0) {	
 		dev->id[c][scmd_id(workreq)].curr_req = workreq;
 		dev->last_cmd[c] = scmd_id(workreq);
 		goto cmd_subp;
diff --git a/trunk/drivers/scsi/hptiop.c b/trunk/drivers/scsi/hptiop.c
index da876d3924be..aaa48e0c8ed0 100644
--- a/trunk/drivers/scsi/hptiop.c
+++ b/trunk/drivers/scsi/hptiop.c
@@ -444,7 +444,7 @@ static void __iomem *hptiop_map_pci_bar(struct hptiop_hba *hba, int index)
 	if (!(pci_resource_flags(pcidev, index) & IORESOURCE_MEM)) {
 		printk(KERN_ERR "scsi%d: pci resource invalid\n",
 				hba->host->host_no);
-		return NULL;
+		return 0;
 	}
 
 	mem_base_phy = pci_resource_start(pcidev, index);
@@ -454,7 +454,7 @@ static void __iomem *hptiop_map_pci_bar(struct hptiop_hba *hba, int index)
 	if (!mem_base_virt) {
 		printk(KERN_ERR "scsi%d: Fail to ioremap memory space\n",
 				hba->host->host_no);
-		return NULL;
+		return 0;
 	}
 	return mem_base_virt;
 }
@@ -476,11 +476,11 @@ static void hptiop_unmap_pci_bar_itl(struct hptiop_hba *hba)
 static int hptiop_map_pci_bar_mv(struct hptiop_hba *hba)
 {
 	hba->u.mv.regs = hptiop_map_pci_bar(hba, 0);
-	if (hba->u.mv.regs == NULL)
+	if (hba->u.mv.regs == 0)
 		return -1;
 
 	hba->u.mv.mu = hptiop_map_pci_bar(hba, 2);
-	if (hba->u.mv.mu == NULL) {
+	if (hba->u.mv.mu == 0) {
 		iounmap(hba->u.mv.regs);
 		return -1;
 	}
@@ -1210,8 +1210,8 @@ static void hptiop_remove(struct pci_dev *pcidev)
 
 static struct hptiop_adapter_ops hptiop_itl_ops = {
 	.iop_wait_ready    = iop_wait_ready_itl,
-	.internal_memalloc = NULL,
-	.internal_memfree  = NULL,
+	.internal_memalloc = 0,
+	.internal_memfree  = 0,
 	.map_pci_bar       = hptiop_map_pci_bar_itl,
 	.unmap_pci_bar     = hptiop_unmap_pci_bar_itl,
 	.enable_intr       = hptiop_enable_intr_itl,
diff --git a/trunk/drivers/scsi/qla1280.c b/trunk/drivers/scsi/qla1280.c
index 3754ab87f89a..51e2f299dbbb 100644
--- a/trunk/drivers/scsi/qla1280.c
+++ b/trunk/drivers/scsi/qla1280.c
@@ -2811,7 +2811,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 
 	/* Check for room in outstanding command list. */
 	for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS &&
-		     ha->outstanding_cmds[cnt] != NULL; cnt++);
+		     ha->outstanding_cmds[cnt] != 0; cnt++);
 
 	if (cnt >= MAX_OUTSTANDING_COMMANDS) {
 		status = 1;
diff --git a/trunk/drivers/video/aty/atyfb_base.c b/trunk/drivers/video/aty/atyfb_base.c
index bd4ac0bafecb..e4bcf5376a99 100644
--- a/trunk/drivers/video/aty/atyfb_base.c
+++ b/trunk/drivers/video/aty/atyfb_base.c
@@ -3356,7 +3356,7 @@ static int __devinit atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *i
 
 	info->fix.mmio_start = raddr;
 	par->ati_regbase = ioremap(info->fix.mmio_start, 0x1000);
-	if (par->ati_regbase == NULL)
+	if (par->ati_regbase == 0)
 		return -ENOMEM;
 
 	info->fix.mmio_start += par->aux_start ? 0x400 : 0xc00;
diff --git a/trunk/drivers/video/aty/radeon_base.c b/trunk/drivers/video/aty/radeon_base.c
index 400e9264e456..72cd0d2f14ec 100644
--- a/trunk/drivers/video/aty/radeon_base.c
+++ b/trunk/drivers/video/aty/radeon_base.c
@@ -2277,8 +2277,8 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev,
 	do {
 		rinfo->fb_base = ioremap (rinfo->fb_base_phys,
 					  rinfo->mapped_vram);
-	} while (rinfo->fb_base == NULL &&
-		 ((rinfo->mapped_vram /= 2) >= MIN_MAPPED_VRAM));
+	} while (   rinfo->fb_base == 0 &&
+		  ((rinfo->mapped_vram /=2) >= MIN_MAPPED_VRAM) );
 
 	if (rinfo->fb_base == NULL) {
 		printk (KERN_ERR "radeonfb (%s): cannot map FB\n",
diff --git a/trunk/drivers/video/matrox/matroxfb_base.h b/trunk/drivers/video/matrox/matroxfb_base.h
index 95883236c0cd..f3107ad7e545 100644
--- a/trunk/drivers/video/matrox/matroxfb_base.h
+++ b/trunk/drivers/video/matrox/matroxfb_base.h
@@ -200,7 +200,7 @@ static inline int mga_ioremap(unsigned long phys, unsigned long size, int flags,
 		virt->vaddr = ioremap_nocache(phys, size);
 	else
 		virt->vaddr = ioremap(phys, size);
-	return (virt->vaddr == NULL); /* 0, !0... 0, error_code in future */
+	return (virt->vaddr == 0); /* 0, !0... 0, error_code in future */
 }
 
 static inline void mga_iounmap(vaddr_t va) {
diff --git a/trunk/drivers/video/sis/sis_main.c b/trunk/drivers/video/sis/sis_main.c
index b9343844cd1f..73803624c131 100644
--- a/trunk/drivers/video/sis/sis_main.c
+++ b/trunk/drivers/video/sis/sis_main.c
@@ -5787,7 +5787,7 @@ sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	} else {
 		struct sis_video_info *countvideo = card_list;
 		ivideo->cardnumber = 1;
-		while((countvideo = countvideo->next) != NULL)
+		while((countvideo = countvideo->next) != 0)
 			ivideo->cardnumber++;
 	}
 
diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b4..5105015a75ad 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_buf.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c
@@ -387,8 +387,6 @@ _xfs_buf_lookup_pages(
 		if (unlikely(page == NULL)) {
 			if (flags & XBF_READ_AHEAD) {
 				bp->b_page_count = i;
-				for (i = 0; i < bp->b_page_count; i++)
-					unlock_page(bp->b_pages[i]);
 				return -ENOMEM;
 			}
 
@@ -418,24 +416,17 @@ _xfs_buf_lookup_pages(
 		ASSERT(!PagePrivate(page));
 		if (!PageUptodate(page)) {
 			page_count--;
-			if (blocksize >= PAGE_CACHE_SIZE) {
-				if (flags & XBF_READ)
-					bp->b_flags |= _XBF_PAGE_LOCKED;
-			} else if (!PagePrivate(page)) {
+			if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
 				if (test_page_region(page, offset, nbytes))
 					page_count++;
 			}
 		}
 
+		unlock_page(page);
 		bp->b_pages[i] = page;
 		offset = 0;
 	}
 
-	if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
-		for (i = 0; i < bp->b_page_count; i++)
-			unlock_page(bp->b_pages[i]);
-	}
-
 	if (page_count == bp->b_page_count)
 		bp->b_flags |= XBF_DONE;
 
@@ -755,7 +746,6 @@ xfs_buf_associate_memory(
 	bp->b_count_desired = len;
 	bp->b_buffer_length = buflen;
 	bp->b_flags |= XBF_MAPPED;
-	bp->b_flags &= ~_XBF_PAGE_LOCKED;
 
 	return 0;
 }
@@ -1103,10 +1093,8 @@ _xfs_buf_ioend(
 	xfs_buf_t		*bp,
 	int			schedule)
 {
-	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
-		bp->b_flags &= ~_XBF_PAGE_LOCKED;
+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
 		xfs_buf_ioend(bp, schedule);
-	}
 }
 
 STATIC void
@@ -1137,9 +1125,6 @@ xfs_buf_bio_end_io(
 
 		if (--bvec >= bio->bi_io_vec)
 			prefetchw(&bvec->bv_page->flags);
-
-		if (bp->b_flags & _XBF_PAGE_LOCKED)
-			unlock_page(page);
 	} while (bvec >= bio->bi_io_vec);
 
 	_xfs_buf_ioend(bp, 1);
@@ -1178,8 +1163,7 @@ _xfs_buf_ioapply(
 	 * filesystem block size is not smaller than the page size.
 	 */
 	if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
-	    ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
-	      (XBF_READ|_XBF_PAGE_LOCKED)) &&
+	    (bp->b_flags & XBF_READ) &&
 	    (blocksize >= PAGE_CACHE_SIZE)) {
 		bio = bio_alloc(GFP_NOIO, 1);
 
diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.h b/trunk/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a4..841d7883528d 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_buf.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_buf.h
@@ -66,25 +66,6 @@ typedef enum {
 	_XBF_PAGES = (1 << 18),	    /* backed by refcounted pages	   */
 	_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
 	_XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue		   */
-
-	/*
-	 * Special flag for supporting metadata blocks smaller than a FSB.
-	 *
-	 * In this case we can have multiple xfs_buf_t on a single page and
-	 * need to lock out concurrent xfs_buf_t readers as they only
-	 * serialise access to the buffer.
-	 *
-	 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
-	 * between reads of the page. Hence we can have one thread read the
-	 * page and modify it, but then race with another thread that thinks
-	 * the page is not up-to-date and hence reads it again.
-	 *
-	 * The result is that the first modifcation to the page is lost.
-	 * This sort of AGF/AGI reading race can happen when unlinking inodes
-	 * that require truncation and results in the AGI unlinked list
-	 * modifications being lost.
-	 */
-	_XBF_PAGE_LOCKED = (1 << 22),
 } xfs_buf_flags_t;
 
 typedef enum {
diff --git a/trunk/fs/xfs/linux-2.6/xfs_file.c b/trunk/fs/xfs/linux-2.6/xfs_file.c
index 5f60363b9343..65e78c13d4ae 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_file.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_file.c
@@ -184,24 +184,19 @@ xfs_file_release(
 	return -xfs_release(XFS_I(inode));
 }
 
-/*
- * We ignore the datasync flag here because a datasync is effectively
- * identical to an fsync. That is, datasync implies that we need to write
- * only the metadata needed to be able to access the data that is written
- * if we crash after the call completes. Hence if we are writing beyond
- * EOF we have to log the inode size change as well, which makes it a
- * full fsync. If we don't write beyond EOF, the inode core will be
- * clean in memory and so we don't need to log the inode, just like
- * fsync.
- */
 STATIC int
 xfs_file_fsync(
 	struct file	*filp,
 	struct dentry	*dentry,
 	int		datasync)
 {
+	int		flags = FSYNC_WAIT;
+
+	if (datasync)
+		flags |= FSYNC_DATA;
 	xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
-	return -xfs_fsync(XFS_I(dentry->d_inode));
+	return -xfs_fsync(XFS_I(dentry->d_inode), flags,
+			(xfs_off_t)0, (xfs_off_t)-1);
 }
 
 /*
diff --git a/trunk/fs/xfs/linux-2.6/xfs_vnode.h b/trunk/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9b..9d73cb5c0fc7 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_vnode.h
@@ -229,6 +229,14 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
 #define ATTR_NOLOCK	0x200	/* Don't grab any conflicting locks */
 #define ATTR_NOSIZETOK	0x400	/* Don't get the SIZE token */
 
+/*
+ * Flags to vop_fsync/reclaim.
+ */
+#define FSYNC_NOWAIT	0	/* asynchronous flush */
+#define FSYNC_WAIT	0x1	/* synchronous fsync or forced reclaim */
+#define FSYNC_INVAL	0x2	/* flush and invalidate cached data */
+#define FSYNC_DATA	0x4	/* synchronous fsync of data only */
+
 /*
  * Tracking vnode activity.
  */
diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c
index e569bf5d6cf0..cf0bb9c1d621 100644
--- a/trunk/fs/xfs/xfs_inode.c
+++ b/trunk/fs/xfs/xfs_inode.c
@@ -2974,7 +2974,6 @@ xfs_iflush_cluster(
 	xfs_mount_t		*mp = ip->i_mount;
 	xfs_perag_t		*pag = xfs_get_perag(mp, ip->i_ino);
 	unsigned long		first_index, mask;
-	unsigned long		inodes_per_cluster;
 	int			ilist_size;
 	xfs_inode_t		**ilist;
 	xfs_inode_t		*iq;
@@ -2986,9 +2985,8 @@ xfs_iflush_cluster(
 	ASSERT(pag->pagi_inodeok);
 	ASSERT(pag->pag_ici_init);
 
-	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
-	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
-	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
+	ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+	ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
 	if (!ilist)
 		return 0;
 
@@ -2997,7 +2995,8 @@ xfs_iflush_cluster(
 	read_lock(&pag->pag_ici_lock);
 	/* really need a gang lookup range call here */
 	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
-					first_index, inodes_per_cluster);
+					first_index,
+					XFS_INODE_CLUSTER_SIZE(mp));
 	if (nr_found == 0)
 		goto out_free;
 
diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c
index e475e3717eb3..70702a60b4bb 100644
--- a/trunk/fs/xfs/xfs_vnodeops.c
+++ b/trunk/fs/xfs/xfs_vnodeops.c
@@ -856,14 +856,18 @@ xfs_readlink(
 /*
  * xfs_fsync
  *
- * This is called to sync the inode and its data out to disk.  We need to hold
- * the I/O lock while flushing the data, and the inode lock while flushing the
- * inode.  The inode lock CANNOT be held while flushing the data, so acquire
- * after we're done with that.
+ * This is called to sync the inode and its data out to disk.
+ * We need to hold the I/O lock while flushing the data, and
+ * the inode lock while flushing the inode.  The inode lock CANNOT
+ * be held while flushing the data, so acquire after we're done
+ * with that.
  */
 int
 xfs_fsync(
-	xfs_inode_t	*ip)
+	xfs_inode_t	*ip,
+	int		flag,
+	xfs_off_t	start,
+	xfs_off_t	stop)
 {
 	xfs_trans_t	*tp;
 	int		error;
@@ -871,79 +875,103 @@ xfs_fsync(
 
 	xfs_itrace_entry(ip);
 
+	ASSERT(start >= 0 && stop >= -1);
+
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return XFS_ERROR(EIO);
 
-	/* capture size updates in I/O completion before writing the inode. */
-	error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
-	if (error)
-		return XFS_ERROR(error);
+	if (flag & FSYNC_DATA)
+		filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
 
 	/*
-	 * We always need to make sure that the required inode state is safe on
-	 * disk.  The vnode might be clean but we still might need to force the
-	 * log because of committed transactions that haven't hit the disk yet.
-	 * Likewise, there could be unflushed non-transactional changes to the
-	 * inode core that have to go to disk and this requires us to issue
-	 * a synchronous transaction to capture these changes correctly.
+	 * We always need to make sure that the required inode state
+	 * is safe on disk.  The vnode might be clean but because
+	 * of committed transactions that haven't hit the disk yet.
+	 * Likewise, there could be unflushed non-transactional
+	 * changes to the inode core that have to go to disk.
 	 *
-	 * This code relies on the assumption that if the update_* fields
-	 * of the inode are clear and the inode is unpinned then it is clean
-	 * and no action is required.
+	 * The following code depends on one assumption:  that
+	 * any transaction that changes an inode logs the core
+	 * because it has to change some field in the inode core
+	 * (typically nextents or nblocks).  That assumption
+	 * implies that any transactions against an inode will
+	 * catch any non-transactional updates.  If inode-altering
+	 * transactions exist that violate this assumption, the
+	 * code breaks.  Right now, it figures that if the involved
+	 * update_* field is clear and the inode is unpinned, the
+	 * inode is clean.  Either it's been flushed or it's been
+	 * committed and the commit has hit the disk unpinning the inode.
+	 * (Note that xfs_inode_item_format() called at commit clears
+	 * the update_* fields.)
 	 */
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 
-	if (!(ip->i_update_size || ip->i_update_core)) {
+	/* If we are flushing data then we care about update_size
+	 * being set, otherwise we care about update_core
+	 */
+	if ((flag & FSYNC_DATA) ?
+			(ip->i_update_size == 0) :
+			(ip->i_update_core == 0)) {
 		/*
-		 * Timestamps/size haven't changed since last inode flush or
-		 * inode transaction commit.  That means either nothing got
-		 * written or a transaction committed which caught the updates.
-		 * If the latter happened and the transaction hasn't hit the
-		 * disk yet, the inode will be still be pinned.  If it is,
-		 * force the log.
+		 * Timestamps/size haven't changed since last inode
+		 * flush or inode transaction commit.  That means
+		 * either nothing got written or a transaction
+		 * committed which caught the updates.	If the
+		 * latter happened and the transaction hasn't
+		 * hit the disk yet, the inode will be still
+		 * be pinned.  If it is, force the log.
 		 */
 
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 		if (xfs_ipincount(ip)) {
-			error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
-				      XFS_LOG_FORCE | XFS_LOG_SYNC,
+			_xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
+				      XFS_LOG_FORCE |
+				      ((flag & FSYNC_WAIT)
+				       ? XFS_LOG_SYNC : 0),
 				      &log_flushed);
 		} else {
 			/*
-			 * If the inode is not pinned and nothing has changed
-			 * we don't need to flush the cache.
+			 * If the inode is not pinned and nothing
+			 * has changed we don't need to flush the
+			 * cache.
 			 */
 			changed = 0;
 		}
+		error = 0;
 	} else	{
 		/*
-		 * Kick off a transaction to log the inode core to get the
-		 * updates.  The sync transaction will also force the log.
+		 * Kick off a transaction to log the inode
+		 * core to get the updates.  Make it
+		 * sync if FSYNC_WAIT is passed in (which
+		 * is done by everybody but specfs).  The
+		 * sync transaction will also force the log.
 		 */
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
-		error = xfs_trans_reserve(tp, 0,
-				XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
-		if (error) {
+		if ((error = xfs_trans_reserve(tp, 0,
+				XFS_FSYNC_TS_LOG_RES(ip->i_mount),
+				0, 0, 0)))  {
 			xfs_trans_cancel(tp, 0);
 			return error;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 
 		/*
-		 * Note - it's possible that we might have pushed ourselves out
-		 * of the way during trans_reserve which would flush the inode.
-		 * But there's no guarantee that the inode buffer has actually
-		 * gone out yet (it's delwri).	Plus the buffer could be pinned
-		 * anyway if it's part of an inode in another recent
-		 * transaction.	 So we play it safe and fire off the
-		 * transaction anyway.
+		 * Note - it's possible that we might have pushed
+		 * ourselves out of the way during trans_reserve
+		 * which would flush the inode.	 But there's no
+		 * guarantee that the inode buffer has actually
+		 * gone out yet (it's delwri).	Plus the buffer
+		 * could be pinned anyway if it's part of an
+		 * inode in another recent transaction.	 So we
+		 * play it safe and fire off the transaction anyway.
 		 */
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_trans_ihold(tp, ip);
 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-		xfs_trans_set_sync(tp);
+		if (flag & FSYNC_WAIT)
+			xfs_trans_set_sync(tp);
 		error = _xfs_trans_commit(tp, 0, &log_flushed);
 
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h
index 57335ba4ce53..8abe8f186e20 100644
--- a/trunk/fs/xfs/xfs_vnodeops.h
+++ b/trunk/fs/xfs/xfs_vnodeops.h
@@ -18,7 +18,8 @@ int xfs_open(struct xfs_inode *ip);
 int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
 		struct cred *credp);
 int xfs_readlink(struct xfs_inode *ip, char *link);
-int xfs_fsync(struct xfs_inode *ip);
+int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start,
+		xfs_off_t stop);
 int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c
index 5f80478b746d..f5e9491ef7ac 100644
--- a/trunk/kernel/module.c
+++ b/trunk/kernel/module.c
@@ -1337,19 +1337,7 @@ int mod_sysfs_setup(struct module *mod,
 	kobject_put(&mod->mkobj.kobj);
 	return err;
 }
-
-static void mod_sysfs_fini(struct module *mod)
-{
-	kobject_put(&mod->mkobj.kobj);
-}
-
-#else /* CONFIG_SYSFS */
-
-static void mod_sysfs_fini(struct module *mod)
-{
-}
-
-#endif /* CONFIG_SYSFS */
+#endif
 
 static void mod_kobject_remove(struct module *mod)
 {
@@ -1357,7 +1345,7 @@ static void mod_kobject_remove(struct module *mod)
 	module_param_sysfs_remove(mod);
 	kobject_put(mod->mkobj.drivers_dir);
 	kobject_put(mod->holders_dir);
-	mod_sysfs_fini(mod);
+	kobject_put(&mod->mkobj.kobj);
 }
 
 /*
@@ -1792,7 +1780,7 @@ static struct module *load_module(void __user *umod,
 
 	/* Sanity checks against insmoding binaries or wrong arch,
            weird elf version */
-	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
+	if (memcmp(hdr->e_ident, ELFMAG, 4) != 0
 	    || hdr->e_type != ET_REL
 	    || !elf_check_arch(hdr)
 	    || hdr->e_shentsize != sizeof(*sechdrs)) {
diff --git a/trunk/kernel/stop_machine.c b/trunk/kernel/stop_machine.c
index b7350bbfb076..0101aeef7ed7 100644
--- a/trunk/kernel/stop_machine.c
+++ b/trunk/kernel/stop_machine.c
@@ -62,7 +62,8 @@ static int stopmachine(void *cpu)
 		 * help our sisters onto their CPUs. */
 		if (!prepared && !irqs_disabled)
 			yield();
-		cpu_relax();
+		else
+			cpu_relax();
 	}
 
 	/* Ack: we are exiting. */
@@ -105,10 +106,8 @@ static int stop_machine(void)
 	}
 
 	/* Wait for them all to come to life. */
-	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
+	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
 		yield();
-		cpu_relax();
-	}
 
 	/* If some failed, kill them all. */
 	if (ret < 0) {