From f26ca1d699e8b54a50d9faf82327d3c2072aaedd Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 27 Nov 2013 13:33:09 -0700 Subject: [PATCH 001/164] ACPI / PCI / hotplug: Avoid warning when _ADR not present acpiphp_enumerate_slots() walks ACPI namenamespace under a PCI host bridge with callback register_slot(). register_slot() evaluates _ADR for all the device objects and emits a warning message for any error. Some platforms have _HID device objects (such as HPET and IPMI), which trigger unnecessary warning messages. This patch avoids emitting a warning message when a target device object does not have _ADR. Signed-off-by: Toshi Kani Cc: 3.12+ # 3.12+ Signed-off-by: Rafael J. Wysocki --- drivers/pci/hotplug/acpiphp_glue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 1cf605f676735..438a4d09958ba 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -279,7 +279,9 @@ static acpi_status register_slot(acpi_handle handle, u32 lvl, void *data, status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); if (ACPI_FAILURE(status)) { - acpi_handle_warn(handle, "can't evaluate _ADR (%#x)\n", status); + if (status != AE_NOT_FOUND) + acpi_handle_warn(handle, + "can't evaluate _ADR (%#x)\n", status); return AE_OK; } From 80c6463e2fa3377febfc98a6672d92d07f3c26c1 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Nov 2013 10:54:28 -0700 Subject: [PATCH 002/164] power_supply: Fix Oops from NULL pointer dereference from wakeup_source_activate power_supply_register() calls device_init_wakeup() to register a wakeup source before initializing dev_name. As a result, device_wakeup_enable() end up registering wakeup source with a null name when wakeup_source_register() gets called with dev_name(dev) which is null at the time. When kernel is booted with wakeup_source_activate enabled, it will panic when the trace point code tries to dereference ws->name. Fixed the problem by moving up the kobject_set_name() call prior to accesses to dev_name(). Replaced kobject_set_name() with dev_set_name() which is the right interface to be called from drivers. Fixed the call to device_del() prior to device_add() in for wakeup_init_failed error handling code. Trace after the change: bash-2143 [003] d... 132.280697: wakeup_source_activate: BAT1 state=0x20001 kworker/3:2-1169 [003] d... 132.281305: wakeup_source_deactivate: BAT1 state=0x30000 Oops message: [ 819.769934] device: 'BAT1': device_add [ 819.770078] PM: Adding info for No Bus:BAT1 [ 819.770235] BUG: unable to handle kernel NULL pointer dereference at (null) [ 819.770435] IP: [] skip_spaces+0x30/0x30 [ 819.770572] PGD 3efd90067 PUD 3eff61067 PMD 0 [ 819.770716] Oops: 0000 [#1] SMP [ 819.770829] Modules linked in: arc4 iwldvm mac80211 x86_pkg_temp_thermal coretemp kvm_intel joydev i915 kvm uvcvideo ghash_clmulni_intel videobuf2_vmalloc aesni_intel videobuf2_memops videobuf2_core aes_x86_64 ablk_helper cryptd videodev iwlwifi lrw rfcomm gf128mul glue_helper bnep btusb media bluetooth parport_pc hid_generic ppdev snd_hda_codec_hdmi drm_kms_helper snd_hda_codec_realtek cfg80211 drm tpm_infineon samsung_laptop snd_hda_intel usbhid snd_hda_codec hid snd_hwdep snd_pcm microcode snd_page_alloc snd_timer psmouse i2c_algo_bit lpc_ich tpm_tis video wmi mac_hid serio_raw ext2 lp parport r8169 mii [ 819.771802] CPU: 0 PID: 2167 Comm: bash Not tainted 3.12.0+ #25 [ 819.771876] Hardware name: SAMSUNG ELECTRONICS CO., LTD. 900X3C/900X3D/900X4C/900X4D/SAMSUNG_NP1234567890, BIOS P03AAC 07/12/2012 [ 819.772022] task: ffff88002e6ddcc0 ti: ffff8804015ca000 task.ti: ffff8804015ca000 [ 819.772119] RIP: 0010:[] [] skip_spaces+0x30/0x30 [ 819.772242] RSP: 0018:ffff8804015cbc70 EFLAGS: 00010046 [ 819.772310] RAX: 0000000000000003 RBX: ffff88040cfd6d40 RCX: 0000000000000018 [ 819.772397] RDX: 0000000000020001 RSI: 0000000000000000 RDI: 0000000000000000 [ 819.772484] RBP: ffff8804015cbcc0 R08: 0000000000000000 R09: ffff8803f0768d40 [ 819.772570] R10: ffffea001033b800 R11: 0000000000000000 R12: ffffffff81c519c0 [ 819.772656] R13: 0000000000020001 R14: 0000000000000000 R15: 0000000000020001 [ 819.772744] FS: 00007ff98309b740(0000) GS:ffff88041f200000(0000) knlGS:0000000000000000 [ 819.772845] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 819.772917] CR2: 0000000000000000 CR3: 00000003f59dc000 CR4: 00000000001407f0 [ 819.773001] Stack: [ 819.773030] ffffffff81114003 ffff8804015cbcb0 0000000000000000 0000000000000046 [ 819.773146] ffff880409757a18 ffff8803f065a160 0000000000000000 0000000000020001 [ 819.773273] 0000000000000000 0000000000000000 ffff8804015cbce8 ffffffff8143e388 [ 819.773387] Call Trace: [ 819.773434] [] ? ftrace_raw_event_wakeup_source+0x43/0xe0 [ 819.773520] [] wakeup_source_report_event+0xb8/0xd0 [ 819.773595] [] __pm_stay_awake+0x2d/0x50 [ 819.773724] [] power_supply_changed+0x3c/0x90 [ 819.773795] [] power_supply_register+0x18c/0x250 [ 819.773869] [] sysfs_add_battery+0x61/0x7b [ 819.773935] [] battery_notify+0x37/0x3f [ 819.774001] [] notifier_call_chain+0x4c/0x70 [ 819.774071] [] __blocking_notifier_call_chain+0x4d/0x70 [ 819.774149] [] blocking_notifier_call_chain+0x16/0x20 [ 819.774227] [] pm_notifier_call_chain+0x1a/0x40 [ 819.774316] [] hibernate+0x66/0x1c0 [ 819.774407] [] state_store+0x71/0xa0 [ 819.774507] [] kobj_attr_store+0xf/0x20 [ 819.774613] [] sysfs_write_file+0x128/0x1c0 [ 819.774735] [] vfs_write+0xbd/0x1e0 [ 819.774841] [] SyS_write+0x49/0xa0 [ 819.774939] [] system_call_fastpath+0x16/0x1b [ 819.775055] Code: 89 f8 48 89 e5 f6 82 c0 a6 84 81 20 74 15 0f 1f 44 00 00 48 83 c0 01 0f b6 10 f6 82 c0 a6 84 81 20 75 f0 5d c3 66 0f 1f 44 00 00 <80> 3f 00 55 48 89 e5 74 15 48 89 f8 0f 1f 40 00 48 83 c0 01 80 [ 819.775760] RIP [] skip_spaces+0x30/0x30 [ 819.775881] RSP [ 819.775949] CR2: 0000000000000000 [ 819.794175] ---[ end trace c4ef25127039952e ]--- Signed-off-by: Shuah Khan Acked-by: Anton Vorontsov Acked-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Anton Vorontsov --- drivers/power/power_supply_core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 00e6672963601..557af943b2f53 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -511,6 +511,10 @@ int power_supply_register(struct device *parent, struct power_supply *psy) dev_set_drvdata(dev, psy); psy->dev = dev; + rc = dev_set_name(dev, "%s", psy->name); + if (rc) + goto dev_set_name_failed; + INIT_WORK(&psy->changed_work, power_supply_changed_work); rc = power_supply_check_supplies(psy); @@ -524,10 +528,6 @@ int power_supply_register(struct device *parent, struct power_supply *psy) if (rc) goto wakeup_init_failed; - rc = kobject_set_name(&dev->kobj, "%s", psy->name); - if (rc) - goto kobject_set_name_failed; - rc = device_add(dev); if (rc) goto device_add_failed; @@ -553,11 +553,11 @@ int power_supply_register(struct device *parent, struct power_supply *psy) register_cooler_failed: psy_unregister_thermal(psy); register_thermal_failed: -wakeup_init_failed: device_del(dev); -kobject_set_name_failed: device_add_failed: +wakeup_init_failed: check_supplies_failed: +dev_set_name_failed: put_device(dev); success: return rc; From 93353e8088057dd988362e6cae727af43734b494 Mon Sep 17 00:00:00 2001 From: Austin Boyle Date: Sun, 24 Nov 2013 21:41:49 +1100 Subject: [PATCH 003/164] max17042_battery: Fix build errors caused by missing REGMAP_I2C config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit max17042 now uses regmap interface but does not enable config option. This patch fixes the following build errors: drivers/power/max17042_battery.c:661:15: error: variable ‘max17042_regmap_config’ has initializer but incomplete type drivers/power/max17042_battery.c:662:2: error: unknown field ‘reg_bits’ specified in initializer drivers/power/max17042_battery.c:662:2: warning: excess elements in struct initializer drivers/power/max17042_battery.c:662:2: warning: (near initialization for ‘max17042_regmap_config’) drivers/power/max17042_battery.c:663:2: error: unknown field ‘val_bits’ specified in initializer drivers/power/max17042_battery.c:663:2: warning: excess elements in struct initializer drivers/power/max17042_battery.c:663:2: warning: (near initialization for ‘max17042_regmap_config’) drivers/power/max17042_battery.c:664:2: error: unknown field ‘val_format_endian’ specified in initializer drivers/power/max17042_battery.c:664:23: error: ‘REGMAP_ENDIAN_NATIVE’ undeclared here (not in a function) drivers/power/max17042_battery.c:664:2: warning: excess elements in struct initializer drivers/power/max17042_battery.c:664:2: warning: (near initialization for ‘max17042_regmap_config’) drivers/power/max17042_battery.c: In function ‘max17042_probe’: drivers/power/max17042_battery.c:684:2: error: implicit declaration of function ‘devm_regmap_init_i2c’ Signed-off-by: Austin Boyle Acked-by: Jonghwa Lee Signed-off-by: Anton Vorontsov --- drivers/power/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 5e2054afe840e..85ad58c6da172 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -196,6 +196,7 @@ config BATTERY_MAX17040 config BATTERY_MAX17042 tristate "Maxim MAX17042/17047/17050/8997/8966 Fuel Gauge" depends on I2C + select REGMAP_I2C help MAX17042 is fuel-gauge systems for lithium-ion (Li+) batteries in handheld and portable equipment. The MAX17042 is configured From bd0976dd3379e790b031cef7f477c58b82a65fc2 Mon Sep 17 00:00:00 2001 From: Marco Piazza Date: Thu, 28 Nov 2013 00:15:25 +0100 Subject: [PATCH 004/164] Bluetooth: Add support for Toshiba Bluetooth device [0930:0220] This patch adds support for new Toshiba Bluetooth device. T: Bus=05 Lev=01 Prnt=01 Port=02 Cnt=02 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0930 ProdID=0220 Rev=00.02 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Marco Piazza Signed-off-by: Gustavo Padovan --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 6bfc1bb318f63..dceb85f8d9a82 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -87,6 +87,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, { USB_DEVICE(0x0930, 0x0219) }, + { USB_DEVICE(0x0930, 0x0220) }, { USB_DEVICE(0x0489, 0xe057) }, { USB_DEVICE(0x13d3, 0x3393) }, { USB_DEVICE(0x0489, 0xe04e) }, @@ -129,6 +130,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c0ff34f2d2df5..3980fd18f6eae 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -154,6 +154,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, From 4c4d684a55fc01dac6bee696efc56b96d0e6c03a Mon Sep 17 00:00:00 2001 From: Ujjal Roy Date: Wed, 4 Dec 2013 17:27:34 +0530 Subject: [PATCH 005/164] cfg80211: fix WARN_ON for re-association to the expired BSS cfg80211 allows re-association in managed mode and if a user wants to re-associate to the same AP network after the time period of IEEE80211_SCAN_RESULT_EXPIRE, cfg80211 warns with the following message on receiving the connect result event. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 13984 at net/wireless/sme.c:658 __cfg80211_connect_result+0x3a6/0x3e0 [cfg80211]() Call Trace: [] dump_stack+0x46/0x58 [] warn_slowpath_common+0x87/0xb0 [] warn_slowpath_null+0x15/0x20 [] __cfg80211_connect_result+0x3a6/0x3e0 [cfg80211] [] ? update_rq_clock+0x2b/0x50 [] ? update_curr+0x1/0x160 [] cfg80211_process_wdev_events+0xb2/0x1c0 [cfg80211] [] ? pick_next_task_fair+0x63/0x170 [] cfg80211_process_rdev_events+0x38/0x90 [cfg80211] [] cfg80211_event_work+0x1d/0x30 [cfg80211] [] process_one_work+0x17f/0x420 [] worker_thread+0x11a/0x370 [] ? rescuer_thread+0x2f0/0x2f0 [] kthread+0xbb/0xc0 [] ? kthread_create_on_node+0x120/0x120 [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x120/0x120 ---[ end trace 61f3bddc9c4981f7 ]--- The reason is that, in connect result event cfg80211 unholds the BSS to which the device is associated (and was held so far). So, for the event with status successful, when cfg80211 wants to get that BSS from the device's BSS list it gets a NULL BSS because the BSS has been expired and unheld already. Fix it by reshuffling the code. Signed-off-by: Ujjal Roy Signed-off-by: Johannes Berg --- net/wireless/sme.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 65f800890d70d..d3c5bd7c6b513 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -632,6 +632,16 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } #endif + if (!bss && (status == WLAN_STATUS_SUCCESS)) { + WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_ESS, + WLAN_CAPABILITY_ESS); + if (bss) + cfg80211_hold_bss(bss_from_pub(bss)); + } + if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); @@ -649,16 +659,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, return; } - if (!bss) { - WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); - bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, - wdev->ssid, wdev->ssid_len, - WLAN_CAPABILITY_ESS, - WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) - return; - cfg80211_hold_bss(bss_from_pub(bss)); - } + if (WARN_ON(!bss)) + return; wdev->current_bss = bss_from_pub(bss); From 6f041e99fc7ba00e7e65a431527f9235d6b16463 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 21 Nov 2013 13:44:14 +0000 Subject: [PATCH 006/164] of: Fix NULL dereference in unflatten_and_copy() Check whether initial_boot_params is NULL before dereferencing it in unflatten_and_copy_device_tree() for the case where no device tree is available but the arch can still boot to a minimal usable system without it. In this case also log a warning for when the kernel log buffer is obtainable. Signed-off-by: James Hogan Cc: Rob Herring Signed-off-by: Grant Likely --- drivers/of/fdt.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 2fa024b97c435..758b4f8b30b7d 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -922,8 +922,16 @@ void __init unflatten_device_tree(void) */ void __init unflatten_and_copy_device_tree(void) { - int size = __be32_to_cpu(initial_boot_params->totalsize); - void *dt = early_init_dt_alloc_memory_arch(size, + int size; + void *dt; + + if (!initial_boot_params) { + pr_warn("No valid device tree found, continuing without\n"); + return; + } + + size = __be32_to_cpu(initial_boot_params->totalsize); + dt = early_init_dt_alloc_memory_arch(size, __alignof__(struct boot_param_header)); if (dt) { From dfe5b9ad83a63180f358b27d1018649a27b394a9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Dec 2013 11:52:34 +0000 Subject: [PATCH 007/164] GFS2: don't hold s_umount over blkdev_put This is a GFS2 version of Tejun's patch: 4f331f01b9c43bf001d3ffee578a97a1e0633eac vfs: don't hold s_umount over close_bdev_exclusive() call In this case its blkdev_put itself that is the issue and this patch uses the same solution of dropping and retaking s_umount. Reported-by: Tejun Heo Reported-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 82303b4749582..52fa88314f5cd 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (IS_ERR(s)) goto error_bdev; - if (s->s_root) + if (s->s_root) { + /* + * s_umount nests inside bd_mutex during + * __invalidate_device(). blkdev_put() acquires + * bd_mutex and can't be called under s_umount. Drop + * s_umount temporarily. This is safe as we're + * holding an active reference. + */ + up_write(&s->s_umount); blkdev_put(bdev, mode); + down_write(&s->s_umount); + } memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; From 9290a9a7c0bcf5400e8dbfbf9707fa68ea3fb338 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 10 Dec 2013 12:06:35 -0500 Subject: [PATCH 008/164] GFS2: Fix use-after-free race when calling gfs2_remove_from_ail Function gfs2_remove_from_ail drops the reference on the bh via brelse. This patch fixes a race condition whereby bh is deferenced after the brelse when setting bd->bd_blkno = bh->b_blocknr; Under certain rare circumstances, bh might be gone or reused, and bd->bd_blkno is set to whatever that memory happens to be, which is often 0. Later, in gfs2_trans_add_unrevoke, that bd fails the test "bd->bd_blkno >= blkno" which causes it to never be freed. The end result is that the bd is never freed from the bufdata cache, which results in this error: slab error in kmem_cache_destroy(): cache `gfs2_bufdata': Can't free all objects Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 610613fb65b55..9dcb9777a5f80 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct buffer_head *bh = bd->bd_bh; struct gfs2_glock *gl = bd->bd_gl; - gfs2_remove_from_ail(bd); - bd->bd_bh = NULL; bh->b_private = NULL; bd->bd_blkno = bh->b_blocknr; + gfs2_remove_from_ail(bd); /* drops ref on bh */ + bd->bd_bh = NULL; bd->bd_ops = &gfs2_revoke_lops; sdp->sd_log_num_revoke++; atomic_inc(&gl->gl_revokes); From 502be2a32f09f388e4ff34ef2e3ebcabbbb261da Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 13 Dec 2013 08:31:06 -0500 Subject: [PATCH 009/164] GFS2: Fix slab memory leak in gfs2_bufdata This patch fixes a slab memory leak that sometimes can occur for files with a very short lifespan. The problem occurs when a dinode is deleted before it has gotten to the journal properly. In the leak scenario, the bd object is pinned for journal committment (queued to the metadata buffers queue: sd_log_le_buf) but is subsequently unpinned and dequeued before it finds its way to the ail or the revoke queue. In this rare circumstance, the bd object needs to be freed from slab memory, or it is forgotten. We have to be very careful how we do it, though, because multiple processes can call gfs2_remove_from_journal. In order to avoid double-frees, only the process that does the unpinning is allowed to free the bd. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 932415050540e..52f177be3bf86 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int struct address_space *mapping = bh->b_page->mapping; struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct gfs2_bufdata *bd = bh->b_private; + int was_pinned = 0; if (test_clear_buffer_pinned(bh)) { trace_gfs2_pin(bd, 0); @@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int tr->tr_num_databuf_rm++; } tr->tr_touched = 1; + was_pinned = 1; brelse(bh); } if (bd) { spin_lock(&sdp->sd_ail_lock); if (bd->bd_tr) { gfs2_trans_add_revoke(sdp, bd); + } else if (was_pinned) { + bh->b_private = NULL; + kmem_cache_free(gfs2_bufdata_cachep, bd); } spin_unlock(&sdp->sd_ail_lock); } From 499e61279d0f8dc6ecf46b75479118589df58b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20St=C3=BCbner?= Date: Sat, 14 Dec 2013 01:41:55 -0800 Subject: [PATCH 010/164] Input: zforce - fix possible driver hang during suspend handle_level_irq masks the interrupt before handling it, and only unmasks it after the handler is finished. So when a touch event happens after threads are suspended, but before the system is fully asleep the irq handler tries to wakeup the thread which will only happen on the next resume, resulting in the wakeup event never being sent and the driver not being able to wake the system from sleep due to the masked irq. Therefore move the wakeup_event to a small non-threaded handler. Signed-off-by: Heiko Stuebner Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zforce_ts.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index 75762d6ff3ba7..aa127ba392a45 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -455,7 +455,18 @@ static void zforce_complete(struct zforce_ts *ts, int cmd, int result) } } -static irqreturn_t zforce_interrupt(int irq, void *dev_id) +static irqreturn_t zforce_irq(int irq, void *dev_id) +{ + struct zforce_ts *ts = dev_id; + struct i2c_client *client = ts->client; + + if (ts->suspended && device_may_wakeup(&client->dev)) + pm_wakeup_event(&client->dev, 500); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t zforce_irq_thread(int irq, void *dev_id) { struct zforce_ts *ts = dev_id; struct i2c_client *client = ts->client; @@ -465,12 +476,10 @@ static irqreturn_t zforce_interrupt(int irq, void *dev_id) u8 *payload; /* - * When suspended, emit a wakeup signal if necessary and return. + * When still suspended, return. * Due to the level-interrupt we will get re-triggered later. */ if (ts->suspended) { - if (device_may_wakeup(&client->dev)) - pm_wakeup_event(&client->dev, 500); msleep(20); return IRQ_HANDLED; } @@ -763,8 +772,8 @@ static int zforce_probe(struct i2c_client *client, * Therefore we can trigger the interrupt anytime it is low and do * not need to limit it to the interrupt edge. */ - ret = devm_request_threaded_irq(&client->dev, client->irq, NULL, - zforce_interrupt, + ret = devm_request_threaded_irq(&client->dev, client->irq, + zforce_irq, zforce_irq_thread, IRQF_TRIGGER_LOW | IRQF_ONESHOT, input_dev->name, ts); if (ret) { From 2a4d81547b88b4ff566d4b4f9c0e36285ed07634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 15 Dec 2013 21:00:48 -0800 Subject: [PATCH 011/164] Input: define KEY_WWAN for Wireless WAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices with support for mobile networks may have buttons for enabling/disabling such connection. An example can be Linksys router 54G3G. We already have KEY_BLUETOOTH, KEY_WLAN and KEY_UWB so it makes sense to add KEY_WWAN as well. As we already have KEY_WIMAX, use it's value for KEY_WWAN and make it an alias. Signed-off-by: Rafał Miłecki Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index ecc88592ecbe9..bd24470d24a2c 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -464,7 +464,8 @@ struct input_keymap_entry { #define KEY_BRIGHTNESS_ZERO 244 /* brightness off, use ambient */ #define KEY_DISPLAY_OFF 245 /* display device to off state */ -#define KEY_WIMAX 246 +#define KEY_WWAN 246 /* Wireless WAN (LTE, UMTS, GSM, etc.) */ +#define KEY_WIMAX KEY_WWAN #define KEY_RFKILL 247 /* Key that controls all radios */ #define KEY_MICMUTE 248 /* Mute / unmute the microphone */ From bd02cd2549cfcdfc57cb5ce57ffc3feb94f70575 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Dec 2013 12:04:36 +0100 Subject: [PATCH 012/164] radiotap: fix bitmap-end-finding buffer overrun Evan Huus found (by fuzzing in wireshark) that the radiotap iterator code can access beyond the length of the buffer if the first bitmap claims an extension but then there's no data at all. Fix this. Cc: stable@vger.kernel.org Reported-by: Evan Huus Signed-off-by: Johannes Berg --- net/wireless/radiotap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index a271c27fac774..722da616438cd 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -124,6 +124,10 @@ int ieee80211_radiotap_iterator_init( /* find payload start allowing for extended bitmap(s) */ if (iterator->_bitmap_shifter & (1<_arg - + (unsigned long)iterator->_rtheader + sizeof(uint32_t) > + (unsigned long)iterator->_max_length) + return -EINVAL; while (get_unaligned_le32(iterator->_arg) & (1 << IEEE80211_RADIOTAP_EXT)) { iterator->_arg += sizeof(uint32_t); From 1bc5ad168f441f6f8bfd944288a5f7b4963ac1f6 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 17 Dec 2013 03:21:25 -0800 Subject: [PATCH 013/164] Bluetooth: Fix HCI User Channel permission check in hci_sock_sendmsg The HCI User Channel is an admin operation which enforces CAP_NET_ADMIN when binding the socket. Problem now is that it then requires also CAP_NET_RAW when calling into hci_sock_sendmsg. This is not intended and just an oversight since general HCI sockets (which do not require special permission to bind) and HCI User Channel share the same code path here. Remove the extra CAP_NET_RAW check for HCI User Channel write operation since the permission check has already been enforced when binding the socket. This also makes it possible to open HCI User Channel from a privileged process and then hand the file descriptor to an unprivilged process. Signed-off-by: Marcel Holtmann Tested-by: Samuel Ortiz Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 71f0be1730801..73bf644c7c74a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -942,8 +942,22 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); skb_pull(skb, 1); - if (hci_pi(sk)->channel == HCI_CHANNEL_RAW && - bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { + if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { + /* No permission check is needed for user channel + * since that gets enforced when binding the socket. + * + * However check that the packet type is valid. + */ + if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && + bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + err = -EINVAL; + goto drop; + } + + skb_queue_tail(&hdev->raw_q, skb); + queue_work(hdev->workqueue, &hdev->tx_work); + } else if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); @@ -974,14 +988,6 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - if (hci_pi(sk)->channel == HCI_CHANNEL_USER && - bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { - err = -EINVAL; - goto drop; - } - skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } From 657eb17d87852c42b55c4b06d5425baa08b2ddb3 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 28 Nov 2013 12:21:45 +0100 Subject: [PATCH 014/164] ath9k_htc: properly set MAC address and BSSID mask Pick the MAC address of the first virtual interface as the new hardware MAC address. Set BSSID mask according to this MAC address. This fixes CVE-2013-4579. Signed-off-by: Mathy Vanhoef Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 25 +++++++++++++------ drivers/net/wireless/ath/ath9k/main.c | 5 ++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 9a2657fdd9ccd..608d739d13782 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -127,21 +127,26 @@ static void ath9k_htc_bssid_iter(void *data, u8 *mac, struct ieee80211_vif *vif) struct ath9k_vif_iter_data *iter_data = data; int i; - for (i = 0; i < ETH_ALEN; i++) - iter_data->mask[i] &= ~(iter_data->hw_macaddr[i] ^ mac[i]); + if (iter_data->hw_macaddr != NULL) { + for (i = 0; i < ETH_ALEN; i++) + iter_data->mask[i] &= ~(iter_data->hw_macaddr[i] ^ mac[i]); + } else { + iter_data->hw_macaddr = mac; + } } -static void ath9k_htc_set_bssid_mask(struct ath9k_htc_priv *priv, +static void ath9k_htc_set_mac_bssid_mask(struct ath9k_htc_priv *priv, struct ieee80211_vif *vif) { struct ath_common *common = ath9k_hw_common(priv->ah); struct ath9k_vif_iter_data iter_data; /* - * Use the hardware MAC address as reference, the hardware uses it - * together with the BSSID mask when matching addresses. + * Pick the MAC address of the first interface as the new hardware + * MAC address. The hardware will use it together with the BSSID mask + * when matching addresses. */ - iter_data.hw_macaddr = common->macaddr; + iter_data.hw_macaddr = NULL; memset(&iter_data.mask, 0xff, ETH_ALEN); if (vif) @@ -153,6 +158,10 @@ static void ath9k_htc_set_bssid_mask(struct ath9k_htc_priv *priv, ath9k_htc_bssid_iter, &iter_data); memcpy(common->bssidmask, iter_data.mask, ETH_ALEN); + + if (iter_data.hw_macaddr) + memcpy(common->macaddr, iter_data.hw_macaddr, ETH_ALEN); + ath_hw_setbssidmask(common); } @@ -1063,7 +1072,7 @@ static int ath9k_htc_add_interface(struct ieee80211_hw *hw, goto out; } - ath9k_htc_set_bssid_mask(priv, vif); + ath9k_htc_set_mac_bssid_mask(priv, vif); priv->vif_slot |= (1 << avp->index); priv->nvifs++; @@ -1128,7 +1137,7 @@ static void ath9k_htc_remove_interface(struct ieee80211_hw *hw, ath9k_htc_set_opmode(priv); - ath9k_htc_set_bssid_mask(priv, vif); + ath9k_htc_set_mac_bssid_mask(priv, vif); /* * Stop ANI only if there are no associated station interfaces. diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 74f452c7b1667..21aa09e0e825d 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -965,8 +965,9 @@ void ath9k_calculate_iter_data(struct ieee80211_hw *hw, struct ath_common *common = ath9k_hw_common(ah); /* - * Use the hardware MAC address as reference, the hardware uses it - * together with the BSSID mask when matching addresses. + * Pick the MAC address of the first interface as the new hardware + * MAC address. The hardware will use it together with the BSSID mask + * when matching addresses. */ memset(iter_data, 0, sizeof(*iter_data)); memset(&iter_data->mask, 0xff, ETH_ALEN); From 9278db6279e28d4d433bc8a848e10b4ece8793ed Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 11 Dec 2013 17:13:10 -0600 Subject: [PATCH 015/164] rtlwifi: pci: Fix oops on driver unload On Fedora systems, unloading rtl8192ce causes an oops. This patch fixes the problem reported at https://bugzilla.redhat.com/show_bug.cgi?id=852761. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 0f494444bcd1d..5a53195d016b6 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -740,6 +740,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) }; int index = rtlpci->rx_ring[rx_queue_idx].idx; + if (rtlpci->driver_is_goingto_unload) + return; /*RX NORMAL PKT */ while (count--) { /*rx descriptor */ @@ -1636,6 +1638,7 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) */ set_hal_stop(rtlhal); + rtlpci->driver_is_goingto_unload = true; rtlpriv->cfg->ops->disable_interrupt(hw); cancel_work_sync(&rtlpriv->works.lps_change_work); @@ -1653,7 +1656,6 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) ppsc->rfchange_inprogress = true; spin_unlock_irqrestore(&rtlpriv->locks.rf_ps_lock, flags); - rtlpci->driver_is_goingto_unload = true; rtlpriv->cfg->ops->hw_disable(hw); /* some things are not needed if firmware not available */ if (!rtlpriv->max_fw_size) From 73f0b56a1ff64e7fb6c3a62088804bab93bcedc2 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 16 Dec 2013 07:04:59 +0530 Subject: [PATCH 016/164] ath9k: Fix interrupt handling for the AR9002 family This patch adds a driver workaround for a HW issue. A race condition in the HW results in missing interrupts, which can be avoided by a read/write with the ISR register. All chips in the AR9002 series are affected by this bug - AR9003 and above do not have this problem. Cc: stable@vger.kernel.org Cc: Felix Fietkau Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9002_mac.c | 52 +++++++++++++++++---- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9002_mac.c b/drivers/net/wireless/ath/ath9k/ar9002_mac.c index 8d78253c26cee..a366d6b4626f5 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_mac.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_mac.c @@ -76,9 +76,16 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked) mask2 |= ATH9K_INT_CST; if (isr2 & AR_ISR_S2_TSFOOR) mask2 |= ATH9K_INT_TSFOOR; + + if (!(pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED)) { + REG_WRITE(ah, AR_ISR_S2, isr2); + isr &= ~AR_ISR_BCNMISC; + } } - isr = REG_READ(ah, AR_ISR_RAC); + if (pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED) + isr = REG_READ(ah, AR_ISR_RAC); + if (isr == 0xffffffff) { *masked = 0; return false; @@ -97,11 +104,23 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked) *masked |= ATH9K_INT_TX; - s0_s = REG_READ(ah, AR_ISR_S0_S); + if (pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED) { + s0_s = REG_READ(ah, AR_ISR_S0_S); + s1_s = REG_READ(ah, AR_ISR_S1_S); + } else { + s0_s = REG_READ(ah, AR_ISR_S0); + REG_WRITE(ah, AR_ISR_S0, s0_s); + s1_s = REG_READ(ah, AR_ISR_S1); + REG_WRITE(ah, AR_ISR_S1, s1_s); + + isr &= ~(AR_ISR_TXOK | + AR_ISR_TXDESC | + AR_ISR_TXERR | + AR_ISR_TXEOL); + } + ah->intr_txqs |= MS(s0_s, AR_ISR_S0_QCU_TXOK); ah->intr_txqs |= MS(s0_s, AR_ISR_S0_QCU_TXDESC); - - s1_s = REG_READ(ah, AR_ISR_S1_S); ah->intr_txqs |= MS(s1_s, AR_ISR_S1_QCU_TXERR); ah->intr_txqs |= MS(s1_s, AR_ISR_S1_QCU_TXEOL); } @@ -114,13 +133,15 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked) *masked |= mask2; } - if (AR_SREV_9100(ah)) - return true; - - if (isr & AR_ISR_GENTMR) { + if (!AR_SREV_9100(ah) && (isr & AR_ISR_GENTMR)) { u32 s5_s; - s5_s = REG_READ(ah, AR_ISR_S5_S); + if (pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED) { + s5_s = REG_READ(ah, AR_ISR_S5_S); + } else { + s5_s = REG_READ(ah, AR_ISR_S5); + } + ah->intr_gen_timer_trigger = MS(s5_s, AR_ISR_S5_GENTIMER_TRIG); @@ -133,8 +154,21 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked) if ((s5_s & AR_ISR_S5_TIM_TIMER) && !(pCap->hw_caps & ATH9K_HW_CAP_AUTOSLEEP)) *masked |= ATH9K_INT_TIM_TIMER; + + if (!(pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED)) { + REG_WRITE(ah, AR_ISR_S5, s5_s); + isr &= ~AR_ISR_GENTMR; + } } + if (!(pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED)) { + REG_WRITE(ah, AR_ISR, isr); + REG_READ(ah, AR_ISR); + } + + if (AR_SREV_9100(ah)) + return true; + if (sync_cause) { ath9k_debug_sync_cause(common, sync_cause); fatal_int = From 45c2aff645c82da7b1574dad5062993cf451c699 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 16 Dec 2013 14:59:22 +0800 Subject: [PATCH 017/164] netfilter: nfnetlink_log: unset nf_loggers for netns when unloading module Steven Rostedt and Arnaldo Carvalho de Melo reported a panic when access the files /proc/sys/net/netfilter/nf_log/*. This problem will occur when we do: echo nfnetlink_log > /proc/sys/net/netfilter/nf_log/any_file rmmod nfnetlink_log and then access the files. Since the nf_loggers of netns hasn't been unset, it will point to the memory that has been freed. This bug is introduced by commit 9368a53c ("netfilter: nfnetlink_log: add net namespace support for nfnetlink_log"). [17261.822047] BUG: unable to handle kernel paging request at ffffffffa0d49090 [17261.822056] IP: [] nf_log_proc_dostring+0xf0/0x1d0 [...] [17261.822226] Call Trace: [17261.822235] [] ? security_capable+0x18/0x20 [17261.822240] [] ? ns_capable+0x29/0x50 [17261.822247] [] ? net_ctl_permissions+0x1f/0x90 [17261.822254] [] proc_sys_call_handler+0xb3/0xc0 [17261.822258] [] proc_sys_read+0x11/0x20 [17261.822265] [] vfs_read+0x9e/0x170 [17261.822270] [] SyS_read+0x49/0xa0 [17261.822276] [] ? __audit_syscall_exit+0x1f6/0x2a0 [17261.822283] [] system_call_fastpath+0x16/0x1b [17261.822285] Code: cc 81 4d 63 e4 4c 89 45 88 48 89 4d 90 e8 19 03 0d 00 4b 8b 84 e5 28 08 00 00 48 8b 4d 90 4c 8b 45 88 48 85 c0 0f 84 a8 00 00 00 <48> 8b 40 10 48 89 43 08 48 89 df 4c 89 f2 31 f6 e8 4b 35 af ff [17261.822329] RIP [] nf_log_proc_dostring+0xf0/0x1d0 [17261.822334] RSP [17261.822336] CR2: ffffffffa0d49090 [17261.822340] ---[ end trace a14ce54c0897a90d ]--- Reported-by: Arnaldo Carvalho de Melo Reported-by: Steven Rostedt Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 3c4b69e5fe173..a155d19a225ed 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1053,6 +1053,7 @@ static void __net_exit nfnl_log_net_exit(struct net *net) #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); #endif + nf_log_unset(net, &nfulnl_logger); } static struct pernet_operations nfnl_log_net_ops = { From 85328240c625f322af9f69c7b60e619717101d77 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 26 Nov 2013 06:33:52 +0000 Subject: [PATCH 018/164] net: allow netdev_all_upper_get_next_dev_rcu with rtnl lock held It is useful to be able to walk all upper devices when bringing a device online where the RTNL lock is held. In this case it is safe to walk the all_adj_list because the RTNL lock is used to protect the write side as well. This patch adds a check to see if the rtnl lock is held before throwing a warning in netdev_all_upper_get_next_dev_rcu(). Also because we now have a call site for lockdep_rtnl_is_held() outside COFIG_LOCK_PROVING an inline definition returning 1 is needed. Similar to the rcu_read_lock_is_held(). Fixes: 2a47fa45d4df ("ixgbe: enable l2 forwarding acceleration for macvlans") CC: Veaceslav Falico Reported-by: Yuanhan Liu Signed-off-by: John Fastabend Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- include/linux/rtnetlink.h | 5 +++++ net/core/dev.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 939428ad25acb..8e3e66ac0a521 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -24,6 +24,11 @@ extern int rtnl_trylock(void); extern int rtnl_is_locked(void); #ifdef CONFIG_PROVE_LOCKING extern int lockdep_rtnl_is_held(void); +#else +static inline int lockdep_rtnl_is_held(void) +{ + return 1; +} #endif /* #ifdef CONFIG_PROVE_LOCKING */ /** diff --git a/net/core/dev.c b/net/core/dev.c index ba3b7ea5ebb31..4fc17221545d4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4500,7 +4500,7 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); From 9e6c3b63399dd424d33a34e08b77f2cab0b84cdc Mon Sep 17 00:00:00 2001 From: David Ertman Date: Sat, 14 Dec 2013 07:18:18 +0000 Subject: [PATCH 019/164] e1000e: fix compiler warnings This patch is to fix a compiler warning of __bad_udelay due to a value of >999 being passed as a parameter to udelay() in the function e1000e_phy_has_link_generic(). This affects the gcc compiler when it is given a flag of -O3 and the icc compiler. This patch is also making the change from mdelay() to msleep() in the same function, since it was determined though code inspection that this function is never called in atomic context. Signed-off-by: David Ertman Acked-by: Bruce Allan Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/phy.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index da2be59505c06..20e71f4ca4261 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -1757,19 +1757,23 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, * it across the board. */ ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); - if (ret_val) + if (ret_val) { /* If the first read fails, another entity may have * ownership of the resources, wait and try again to * see if they have relinquished the resources yet. */ - udelay(usec_interval); + if (usec_interval >= 1000) + msleep(usec_interval / 1000); + else + udelay(usec_interval); + } ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); if (ret_val) break; if (phy_status & BMSR_LSTATUS) break; if (usec_interval >= 1000) - mdelay(usec_interval / 1000); + msleep(usec_interval / 1000); else udelay(usec_interval); } From 918a4308bad24a8c40a9e569fb7b3a6295ec7757 Mon Sep 17 00:00:00 2001 From: David Ertman Date: Sat, 14 Dec 2013 07:30:39 +0000 Subject: [PATCH 020/164] e1000e: fix compiler warning (maybe-unitialized variable) This patch is to fix a compiler warning of maybe-uininitialized-variable that is generated from gcc when the -O3 flag is used. In the function e1000_reset_hw_80003es2lan(), the variable krmn_reg_data is first given a value by being passed to a register read function as a pass-by-reference parameter. But, the return value of that read function was never checked to see if the read failed and the variable not given an initial value. The compiler was smart enough to spot this. This patch is to check the return value for that read function and return it, if an error occurs, without trying to utilize the value in kmrn_reg_data. Signed-off-by: David Ertman Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/80003es2lan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index 895450e9bb3cf..ff2d806eaef71 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -718,8 +718,11 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) e1000_release_phy_80003es2lan(hw); /* Disable IBIST slave mode (far-end loopback) */ - e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - &kum_reg_data); + ret_val = + e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, + &kum_reg_data); + if (ret_val) + return ret_val; kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, kum_reg_data); From 7509963c703b71eebccc421585e7f48ebbbd3f38 Mon Sep 17 00:00:00 2001 From: David Ertman Date: Tue, 17 Dec 2013 04:42:42 +0000 Subject: [PATCH 021/164] e1000e: Fix a compile flag mis-match for suspend/resume This patch addresses a mis-match between the declaration and usage of the e1000_suspend and e1000_resume functions. Previously, these functions were declared in a CONFIG_PM_SLEEP wrapper, and then utilized within a CONFIG_PM wrapper. Both the declaration and usage will now be contained within CONFIG_PM wrappers. Signed-off-by: Dave Ertman Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 8d3945ab73348..c30d41d6e4260 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6174,7 +6174,7 @@ static int __e1000_resume(struct pci_dev *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM static int e1000_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -6193,7 +6193,7 @@ static int e1000_resume(struct device *dev) return __e1000_resume(pdev); } -#endif /* CONFIG_PM_SLEEP */ +#endif /* CONFIG_PM */ #ifdef CONFIG_PM_RUNTIME static int e1000_runtime_suspend(struct device *dev) From 8f48f5bc759d6f945ff0c3b2bf2a1d5971e561ba Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Fri, 22 Nov 2013 04:27:23 +0000 Subject: [PATCH 022/164] ixgbe: fix for unused variable warning with certain config If CONFIG_PCI_IOV isn't defined we get an "unused variable" warining so now wrap the variable declaration like it's usage already was. Signed-off-by: Don Skidmore Acked-by: John Fastabend Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index d6f0c0d8cf11d..72084f70adbba 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -291,7 +291,9 @@ static int ixgbe_pci_sriov_disable(struct pci_dev *dev) { struct ixgbe_adapter *adapter = pci_get_drvdata(dev); int err; +#ifdef CONFIG_PCI_IOV u32 current_flags = adapter->flags; +#endif err = ixgbe_disable_sriov(adapter); From d80512f87474f2dfd67ef931737659acce20fe69 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Dec 2013 14:31:26 +0100 Subject: [PATCH 023/164] s390/smp: improve setup of possible cpu mask Since under z/VM we cannot have more than 64 cpus, make sure the cpu_possible_mask does not contain more bits. This avoids wasting memory for dynamic per-cpu allocations if CONFIG_NR_CPUS is larger than 64. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 - arch/s390/include/asm/smp.h | 2 ++ arch/s390/kernel/setup.c | 1 + arch/s390/kernel/smp.c | 25 ++++++++++++++++--------- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1e1a03d2d19fb..e9f3125325266 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -135,7 +135,6 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING - select INIT_ALL_POSSIBLE select KTIME_SCALAR if 32BIT select MODULES_USE_ELF_RELA select OLD_SIGACTION diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index ac9bed8e103fa..1607793940962 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -31,6 +31,7 @@ extern void smp_yield(void); extern void smp_stop_cpu(void); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); +extern void smp_fill_possible_mask(void); #else /* CONFIG_SMP */ @@ -50,6 +51,7 @@ static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } static inline void smp_yield(void) { } static inline void smp_stop_cpu(void) { } +static inline void smp_fill_possible_mask(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4444875266ee0..0f3d44ecbfc6d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1023,6 +1023,7 @@ void __init setup_arch(char **cmdline_p) setup_vmcoreinfo(); setup_lowcore(); + smp_fill_possible_mask(); cpu_init(); s390_init_cpu_topology(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index dc4a534650604..958704798f4a8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -721,18 +721,14 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) return 0; } -static int __init setup_possible_cpus(char *s) -{ - int max, cpu; +static unsigned int setup_possible_cpus __initdata; - if (kstrtoint(s, 0, &max) < 0) - return 0; - init_cpu_possible(cpumask_of(0)); - for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++) - set_cpu_possible(cpu, true); +static int __init _setup_possible_cpus(char *s) +{ + get_option(&s, &setup_possible_cpus); return 0; } -early_param("possible_cpus", setup_possible_cpus); +early_param("possible_cpus", _setup_possible_cpus); #ifdef CONFIG_HOTPLUG_CPU @@ -775,6 +771,17 @@ void __noreturn cpu_die(void) #endif /* CONFIG_HOTPLUG_CPU */ +void __init smp_fill_possible_mask(void) +{ + unsigned int possible, cpu; + + possible = setup_possible_cpus; + if (!possible) + possible = MACHINE_IS_VM ? 64 : nr_cpu_ids; + for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { /* request the 0x1201 emergency signal external interrupt */ From 36d9f4d3b68c7035ead3850dc85f310a579ed0eb Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 18 Dec 2013 14:36:18 +0100 Subject: [PATCH 024/164] s390/3270: fix allocation of tty3270_screen structure The tty3270_alloc_screen function is called from tty3270_install with swapped arguments, the number of columns instead of rows and vice versa. The number of rows is typically smaller than the number of columns which makes the screen array too big but the individual cell arrays for the lines too small. Creating lines longer than the number of rows will clobber the memory after the end of the cell array. The fix is simple, call tty3270_alloc_screen with the correct argument order. Signed-off-by: Martin Schwidefsky --- drivers/s390/char/tty3270.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 3f4ca4e09a4cc..34629ea913d4e 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -942,7 +942,7 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty) return rc; } - tp->screen = tty3270_alloc_screen(tp->view.cols, tp->view.rows); + tp->screen = tty3270_alloc_screen(tp->view.rows, tp->view.cols); if (IS_ERR(tp->screen)) { rc = PTR_ERR(tp->screen); raw3270_put_view(&tp->view); From bbca4d39175a96b882def1628e8b532998750dfa Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Tue, 10 Dec 2013 10:51:08 +0100 Subject: [PATCH 025/164] powerpc/512x: dts: remove misplaced IRQ spec from 'soc' node (5125) the 'soc' node in the MPC5125 "tower" board .dts has an '#interrupt-cells' property although this node is not an interrupt controller remove this erroneously placed property because starting with v3.13-rc1 lookup and resolution of 'interrupts' specs for peripherals gets misled (tries to use the 'soc' as the interrupt parent which fails), emits 'no irq domain found' WARN() messages and breaks the boot process [ best viewed with 'git diff -U5' to have DT node names in the context ] Cc: Anatolij Gustschin Cc: linuxppc-dev@lists.ozlabs.org Cc: devicetree@vger.kernel.org Signed-off-by: Gerhard Sittig Signed-off-by: Anatolij Gustschin --- arch/powerpc/boot/dts/mpc5125twr.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts index 4177b62240c24..0a0fe92216ae0 100644 --- a/arch/powerpc/boot/dts/mpc5125twr.dts +++ b/arch/powerpc/boot/dts/mpc5125twr.dts @@ -58,7 +58,6 @@ compatible = "fsl,mpc5121-immr"; #address-cells = <1>; #size-cells = <1>; - #interrupt-cells = <2>; ranges = <0x0 0x80000000 0x400000>; reg = <0x80000000 0x400000>; bus-frequency = <66000000>; // 66 MHz ips bus From 71a06c59d19a57c8dd2972ebee88030f5b0c700e Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 17:29:19 +0800 Subject: [PATCH 026/164] bonding: protect port for bond_3ad_adapter_speed_changed() Jay Vosburgh said that the bond_3ad_adapter_speed_changed is called with RTNL only, and the function will modify the port's information with no further locking, it will not mutex against bond state machine and incoming LACPDU which do not hold RTNL, So I add __get_state_machine_lock to protect the port. But it is not a critical bug, it exist since day one, and till now it has never been hit and reported, because changes to speed is very rare, and will not occur critical problem. The comment in the function is very old, cleanup it. Suggested-by: Jay Vosburgh Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 187b1b7772ef1..6405ce397baf0 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2201,20 +2201,25 @@ void bond_3ad_adapter_speed_changed(struct slave *slave) port = &(SLAVE_AD_INFO(slave).port); - // if slave is null, the whole port is not initialized + /* if slave is null, the whole port is not initialized */ if (!port->slave) { pr_warning("Warning: %s: speed changed for uninitialized port on %s\n", slave->bond->dev->name, slave->dev->name); return; } + __get_state_machine_lock(port); + port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; port->actor_oper_port_key = port->actor_admin_port_key |= (__get_link_speed(port) << 1); pr_debug("Port %d changed speed\n", port->actor_port_number); - // there is no need to reselect a new aggregator, just signal the - // state machines to reinitialize + /* there is no need to reselect a new aggregator, just signal the + * state machines to reinitialize + */ port->sm_vars |= AD_PORT_BEGIN; + + __release_state_machine_lock(port); } /** From bca44a7341924ec92ee7a1ba085c8f0745aeb74e Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 17:29:24 +0800 Subject: [PATCH 027/164] bonding: protect port for bond_3ad_adapter_duplex_changed() Jay Vosburgh said that the bond_3ad_adapter_duplex_changed is called with RTNL only, and the function will modify the port's information with no further locking, it will not mutex against bond state machine and incoming LACPDU which do not hold RTNL, So I add __get_state_machine_lock to protect the port. But it is not a critical bug, it exist since day one, and till now it has never been hit and reported, because changes to speed is very rare, and will not occur critical problem. The comments in the function is very old, cleanup it. Suggested-by: Jay Vosburgh Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6405ce397baf0..e851a67740f78 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2234,20 +2234,25 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave) port = &(SLAVE_AD_INFO(slave).port); - // if slave is null, the whole port is not initialized + /* if slave is null, the whole port is not initialized */ if (!port->slave) { pr_warning("%s: Warning: duplex changed for uninitialized port on %s\n", slave->bond->dev->name, slave->dev->name); return; } + __get_state_machine_lock(port); + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; port->actor_oper_port_key = port->actor_admin_port_key |= __get_duplex(port); pr_debug("Port %d changed duplex\n", port->actor_port_number); - // there is no need to reselect a new aggregator, just signal the - // state machines to reinitialize + /* there is no need to reselect a new aggregator, just signal the + * state machines to reinitialize + */ port->sm_vars |= AD_PORT_BEGIN; + + __release_state_machine_lock(port); } /** From 108db7363736ceafdbc6fc708aa770939eb9906a Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 17:29:29 +0800 Subject: [PATCH 028/164] bonding: protect port for bond_3ad_handle_link_change() The bond_3ad_handle_link_change is called with RTNL only, and the function will modify the port's information with no further locking, it will not mutex against bond state machine and incoming LACPDU which do not hold RTNL, So I add __get_state_machine_lock to protect the port. But it is not a critical bug, it exist since day one, and till now it has never been hit and reported, because changes to speed is very rare, and will not occur critical problem. The comments in the function is very old, cleanup it and add a new pr_debug to debug the port message. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index e851a67740f78..4ced59436558e 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2268,15 +2268,21 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) port = &(SLAVE_AD_INFO(slave).port); - // if slave is null, the whole port is not initialized + /* if slave is null, the whole port is not initialized */ if (!port->slave) { pr_warning("Warning: %s: link status changed for uninitialized port on %s\n", slave->bond->dev->name, slave->dev->name); return; } - // on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed) - // on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report + __get_state_machine_lock(port); + /* on link down we are zeroing duplex and speed since + * some of the adaptors(ce1000.lan) report full duplex/speed + * instead of N/A(duplex) / 0(speed). + * + * on link up we are forcing recheck on the duplex and speed since + * some of he adaptors(ce1000.lan) report. + */ if (link == BOND_LINK_UP) { port->is_enabled = true; port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; @@ -2292,10 +2298,15 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) port->actor_oper_port_key = (port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS); } - //BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN"))); - // there is no need to reselect a new aggregator, just signal the - // state machines to reinitialize + pr_debug("Port %d changed link status to %s", + port->actor_port_number, + (link == BOND_LINK_UP) ? "UP" : "DOWN"); + /* there is no need to reselect a new aggregator, just signal the + * state machines to reinitialize + */ port->sm_vars |= AD_PORT_BEGIN; + + __release_state_machine_lock(port); } /* From a81d8762d71396f69d27187a909fcc69f1a7be75 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Fri, 13 Dec 2013 18:42:55 +0530 Subject: [PATCH 029/164] drivers: net cpsw: Enable In Band mode in cpsw for 10 mbps This patch adds support for enabling In Band mode in 10 mbps speed. RGMII supports 1 Gig and 100 mbps mode for Forced mode of operation. For 10mbps mode it should be configured to in band mode so that link status, duplexity and speed are determined from the RGMII input data stream Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5120d9ce1dd4c..614f284fe1fdc 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -740,6 +740,8 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, /* set speed_in input in case RMII mode is used in 100Mbps */ if (phy->speed == 100) mac_control |= BIT(15); + else if (phy->speed == 10) + mac_control |= BIT(18); /* In Band mode */ *link = true; } else { From 0e3da5bb8da45890b1dc413404e0f978ab71173e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 16 Dec 2013 11:02:09 +0200 Subject: [PATCH 030/164] ip_gre: fix msg_name parsing for recvfrom/recvmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ipgre_header_parse() needs to parse the tunnel's ip header and it uses mac_header to locate the iphdr. This got broken when gre tunneling was refactored as mac_header is no longer updated to point to iphdr. Introduce skb_pop_mac_header() helper to do the mac_header assignment and use it in ipgre_rcv() to fix msg_name parsing. Bug introduced in commit c54419321455 (GRE: Refactor GRE tunneling code.) Cc: Pravin B Shelar Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ net/ipv4/ip_gre.c | 1 + 2 files changed, 6 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 215b5ea1cb302..6aae838905200 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1638,6 +1638,11 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) skb->mac_header += offset; } +static inline void skb_pop_mac_header(struct sk_buff *skb) +{ + skb->mac_header = skb->network_header; +} + static inline void skb_probe_transport_header(struct sk_buff *skb, const int offset_hint) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d7aea4c5b9400..e560ef34cf4bd 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -217,6 +217,7 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) iph->saddr, iph->daddr, tpi->key); if (tunnel) { + skb_pop_mac_header(skb); ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); return PACKET_RCVD; } From 3b3878926c7679765635ed2803dd694a8658042d Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Mon, 16 Dec 2013 11:35:32 +0100 Subject: [PATCH 031/164] dm9601: add support for dm9621a based dongle dm9621a is functionally identical to dm9620, so the existing handling can directly be used. Thanks to Davicom for sending me a dongle. Signed-off-by: Peter Korsgaard Signed-off-by: David S. Miller --- drivers/net/usb/dm9601.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index c6867f926cffc..8e885729aa288 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -594,6 +594,10 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0a46, 0x9620), /* DM9620 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, + { + USB_DEVICE(0x0a46, 0x9621), /* DM9621A USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9601_info, + }, {}, // END }; From 407900cfb54bdb2cfa228010b6697305f66b2948 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Mon, 16 Dec 2013 11:35:33 +0100 Subject: [PATCH 032/164] dm9601: fix reception of full size ethernet frames on dm9620/dm9621a dm9620/dm9621a require room for 4 byte padding even in dm9601 (3 byte header) mode. Cc: Signed-off-by: Peter Korsgaard Signed-off-by: David S. Miller --- drivers/net/usb/dm9601.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 8e885729aa288..aca0285d18bfe 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -364,7 +364,12 @@ static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &dm9601_ethtool_ops; dev->net->hard_header_len += DM_TX_OVERHEAD; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; - dev->rx_urb_size = dev->net->mtu + ETH_HLEN + DM_RX_OVERHEAD; + + /* dm9620/21a require room for 4 byte padding, even in dm9601 + * mode, so we need +1 to be able to receive full size + * ethernet frames. + */ + dev->rx_urb_size = dev->net->mtu + ETH_HLEN + DM_RX_OVERHEAD + 1; dev->mii.dev = dev->net; dev->mii.mdio_read = dm9601_mdio_read; From cabd0e3a3861d45569b8f91633c98fc48d820cdb Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Mon, 16 Dec 2013 11:35:34 +0100 Subject: [PATCH 033/164] dm9601: make it clear that dm9620/dm9621a are also supported The driver nowadays also support dm9620/dm9621a based USB 2.0 ethernet adapters, so adjust module/driver description and Kconfig help text to match. Signed-off-by: Peter Korsgaard Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 6 +++--- drivers/net/usb/dm9601.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 85e4a01670f06..47b0f732b0b10 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -276,12 +276,12 @@ config USB_NET_CDC_MBIM module will be called cdc_mbim. config USB_NET_DM9601 - tristate "Davicom DM9601 based USB 1.1 10/100 ethernet devices" + tristate "Davicom DM96xx based USB 10/100 ethernet devices" depends on USB_USBNET select CRC32 help - This option adds support for Davicom DM9601 based USB 1.1 - 10/100 Ethernet adapters. + This option adds support for Davicom DM9601/DM9620/DM9621A + based USB 10/100 Ethernet adapters. config USB_NET_SR9700 tristate "CoreChip-sz SR9700 based USB 1.1 10/100 ethernet devices" diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index aca0285d18bfe..ca99c35666c5a 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -1,5 +1,5 @@ /* - * Davicom DM9601 USB 1.1 10/100Mbps ethernet devices + * Davicom DM96xx USB 10/100Mbps ethernet devices * * Peter Korsgaard * @@ -548,7 +548,7 @@ static int dm9601_link_reset(struct usbnet *dev) } static const struct driver_info dm9601_info = { - .description = "Davicom DM9601 USB Ethernet", + .description = "Davicom DM96xx USB 10/100 Ethernet", .flags = FLAG_ETHER | FLAG_LINK_INTR, .bind = dm9601_bind, .rx_fixup = dm9601_rx_fixup, @@ -621,5 +621,5 @@ static struct usb_driver dm9601_driver = { module_usb_driver(dm9601_driver); MODULE_AUTHOR("Peter Korsgaard "); -MODULE_DESCRIPTION("Davicom DM9601 USB 1.1 ethernet devices"); +MODULE_DESCRIPTION("Davicom DM96xx USB 10/100 ethernet devices"); MODULE_LICENSE("GPL"); From 4263c86dca5198da6bd3ad826d0b2304fbe25776 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Mon, 16 Dec 2013 11:35:35 +0100 Subject: [PATCH 034/164] dm9601: work around tx fifo sync issue on dm962x Certain dm962x revisions contain an bug, where if a USB bulk transfer retry (E.G. if bulk crc mismatch) happens right after a transfer with odd or maxpacket length, the internal tx hardware fifo gets out of sync causing the interface to stop working. Work around it by adding up to 3 bytes of padding to ensure this situation cannot trigger. This workaround also means we never pass multiple-of-maxpacket size skb's to usbnet, so the length adjustment to handle usbnet's padding of those can be removed. Cc: Reported-by: Joseph Chang Signed-off-by: Peter Korsgaard Signed-off-by: David S. Miller --- drivers/net/usb/dm9601.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index ca99c35666c5a..14aa48fa8d7e5 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -473,7 +473,7 @@ static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { - int len; + int len, pad; /* format: b1: packet length low @@ -481,12 +481,23 @@ static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, b3..n: packet data */ - len = skb->len; + len = skb->len + DM_TX_OVERHEAD; + + /* workaround for dm962x errata with tx fifo getting out of + * sync if a USB bulk transfer retry happens right after a + * packet with odd / maxpacket length by adding up to 3 bytes + * padding. + */ + while ((len & 1) || !(len % dev->maxpacket)) + len++; - if (skb_headroom(skb) < DM_TX_OVERHEAD) { + len -= DM_TX_OVERHEAD; /* hw header doesn't count as part of length */ + pad = len - skb->len; + + if (skb_headroom(skb) < DM_TX_OVERHEAD || skb_tailroom(skb) < pad) { struct sk_buff *skb2; - skb2 = skb_copy_expand(skb, DM_TX_OVERHEAD, 0, flags); + skb2 = skb_copy_expand(skb, DM_TX_OVERHEAD, pad, flags); dev_kfree_skb_any(skb); skb = skb2; if (!skb) @@ -495,10 +506,10 @@ static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, __skb_push(skb, DM_TX_OVERHEAD); - /* usbnet adds padding if length is a multiple of packet size - if so, adjust length value in header */ - if ((skb->len % dev->maxpacket) == 0) - len++; + if (pad) { + memset(skb->data + skb->len, 0, pad); + __skb_put(skb, pad); + } skb->data[0] = len; skb->data[1] = len >> 8; From 4df98e76cde7c64b5606d82584c65dda4151bd6a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 16 Dec 2013 12:36:44 +0100 Subject: [PATCH 035/164] ipv6: pmtudisc setting not respected with UFO/CORK Sockets marked with IPV6_PMTUDISC_PROBE (or later IPV6_PMTUDISC_INTERFACE) don't respect this setting when the outgoing interface supports UFO. We had the same problem in IPv4, which was fixed in commit daba287b299ec7a2c61ae3a714920e90e8396ad5 ("ipv4: fix DO and PROBE pmtu mode regarding local fragmentation with UFO/CORK"). Also IPV6_DONTFRAG mode did not care about already corked data, thus it may generate a fragmented frame even if this socket option was specified. It also did not care about the length of the ipv6 header and possible options. In the error path allow the user to receive the pmtu notifications via both, rxpmtu method or error queue. The user may opted in for both, so deliver the notification to both error handlers (the handlers check if the error needs to be enqueued). Also report back consistent pmtu values when sending on an already cork-appended socket. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4acdb63495dbe..e6f931997996e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1193,11 +1193,35 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->tot_len : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? + mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } @@ -1222,12 +1246,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || From 58a4782449c5882f61882396ef18cc34c7dc1269 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Dec 2013 06:31:23 -0800 Subject: [PATCH 036/164] ipv6: sit: update mtu check to take care of gso packets While testing my changes for TSO support in SIT devices, I was using sit0 tunnel which appears to include nopmtudisc flag. But using : ip tun add sittun mode sit remote $REMOTE_IPV4 local $LOCAL_IPV4 \ dev $IFACE We get a tunnel which rejects too long packets because of the mtu check which is not yet GSO aware. erd:~# ip tunnel sittun: ipv6/ip remote 10.246.17.84 local 10.246.17.83 ttl inherit 6rd-prefix 2002::/16 sit0: ipv6/ip remote any local any ttl 64 nopmtudisc 6rd-prefix 2002::/16 This patch is based on an excellent report from Michal Shmidt. In the future, we probably want to extend the MTU check to do the right thing for GSO packets... Fixes: ("61c1db7fae21 ipv6: sit: add GSO/TSO support") Reported-by: Michal Schmidt Signed-off-by: Eric Dumazet Tested-by: Michal Schmidt Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 366fbba3359ab..a710fdec42d38 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -924,7 +924,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; From b1aac815c0891fe4a55a6b0b715910142227700f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Dec 2013 00:38:39 +0100 Subject: [PATCH 037/164] net: inet_diag: zero out uninitialized idiag_{src,dst} fields Jakub reported while working with nlmon netlink sniffer that parts of the inet_diag_sockid are not initialized when r->idiag_family != AF_INET6. That is, fields of r->id.idiag_src[1 ... 3], r->id.idiag_dst[1 ... 3]. In fact, it seems that we can leak 6 * sizeof(u32) byte of kernel [slab] memory through this. At least, in udp_dump_one(), we allocate a skb in ... rep = nlmsg_new(sizeof(struct inet_diag_msg) + ..., GFP_KERNEL); ... and then pass that to inet_sk_diag_fill() that puts the whole struct inet_diag_msg into the skb, where we only fill out r->id.idiag_src[0], r->id.idiag_dst[0] and leave the rest untouched: r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; struct inet_diag_msg embeds struct inet_diag_sockid that is correctly / fully filled out in IPv6 case, but for IPv4 not. So just zero them out by using plain memset (for this little amount of bytes it's probably not worth the extra check for idiag_family == AF_INET). Similarly, fix also other places where we fill that out. Reported-by: Jakub Zawadzki Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 56a964a553d2c..a0f52dac8940d 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; @@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_family = tw->tw_family; r->idiag_retrans = 0; + r->id.idiag_if = tw->tw_bound_dev_if; sock_diag_save_cookie(tw, r->id.idiag_cookie); + r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = tw->tw_rcv_saddr; r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = jiffies_to_msecs(tmo); @@ -726,8 +737,13 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = ireq->ir_rmt_port; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = ireq->ir_loc_addr; r->id.idiag_dst[0] = ireq->ir_rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; From 0c8d087c04cdcef501064552149289866e53aa6c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 17 Dec 2013 10:42:09 +0800 Subject: [PATCH 038/164] xen-netback: fix some error return code 'err' is overwrited to 0 after maybe_pull_tail() call, so the error code was not set if skb_partial_csum_set() call failed. Fix to return error -EPROTO from those error handling case instead of 0. Fixes: d52eb0d46f36 ('xen-netback: make sure skb linear area covers checksum field') Signed-off-by: Wei Yongjun Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 27bbe58dcbe7b..7b4fd93be76dd 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1209,8 +1209,10 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) + offsetof(struct tcphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) tcp_hdr(skb)->check = @@ -1227,8 +1229,10 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) + offsetof(struct udphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) udp_hdr(skb)->check = @@ -1350,8 +1354,10 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) + offsetof(struct tcphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) tcp_hdr(skb)->check = @@ -1368,8 +1374,10 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) + offsetof(struct udphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) udp_hdr(skb)->check = From e9db5c21d3646a6454fcd04938dd215ac3ab620a Mon Sep 17 00:00:00 2001 From: Wenliang Fan Date: Tue, 17 Dec 2013 11:25:28 +0800 Subject: [PATCH 039/164] drivers/net/hamradio: Integer overflow in hdlcdrv_ioctl() The local variable 'bi' comes from userspace. If userspace passed a large number to 'bi.data.calibrate', there would be an integer overflow in the following line: s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16; Signed-off-by: Wenliang Fan Signed-off-by: David S. Miller --- drivers/net/hamradio/hdlcdrv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 3169252613faa..5d78c1d08abd6 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -571,6 +571,8 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case HDLCDRVCTL_CALIBRATE: if(!capable(CAP_SYS_RAWIO)) return -EPERM; + if (bi.data.calibrate > INT_MAX / s->par.bitrate) + return -EINVAL; s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16; return 0; From 8e3fbf870481eb53b2d3a322d1fc395ad8b367ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Salva=20Peir=C3=B3?= Date: Tue, 17 Dec 2013 10:06:30 +0100 Subject: [PATCH 040/164] hamradio/yam: fix info leak in ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The yam_ioctl() code fails to initialise the cmd field of the struct yamdrv_ioctl_cfg. Add an explicit memset(0) before filling the structure to avoid the 4-byte info leak. Signed-off-by: Salva Peiró Signed-off-by: David S. Miller --- drivers/net/hamradio/yam.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 1971411574db1..61dd2447e1bb4 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -1057,6 +1057,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCYAMGCFG: + memset(&yi, 0, sizeof(yi)); yi.cfg.mask = 0xffffffff; yi.cfg.iobase = yp->iobase; yi.cfg.irq = yp->irq; From c047e0707307717818542c939223fc8ca454e2c9 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 17 Dec 2013 18:51:25 +0100 Subject: [PATCH 041/164] bnx2x: downgrade "valid ME register value" message level "valid ME register value" is not an error. It should be logged for debugging only. Signed-off-by: Michal Schmidt Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index efa8a151d7890..3dc2537fe91b3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -208,7 +208,7 @@ static int bnx2x_get_vf_id(struct bnx2x *bp, u32 *vf_id) return -EINVAL; } - BNX2X_ERR("valid ME register value: 0x%08x\n", me_reg); + DP(BNX2X_MSG_IOV, "valid ME register value: 0x%08x\n", me_reg); *vf_id = (me_reg & ME_REG_VF_NUM_MASK) >> ME_REG_VF_NUM_SHIFT; From 7a2a84518cfb263d2c4171b3d63671f88316adb2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Dec 2013 10:53:02 -0800 Subject: [PATCH 042/164] net: fec: fix potential use after free skb_tx_timestamp(skb) should be called _before_ TX completion has a chance to trigger, otherwise it is too late and we access freed memory. Signed-off-by: Eric Dumazet Fixes: de5fb0a05348 ("net: fec: put tx to napi poll function to fix dead lock") Cc: Frank Li Cc: Richard Cochran Acked-by: Richard Cochran Acked-by: Frank Li Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index e7c8b749c5a53..50bb71c663e20 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -428,6 +428,8 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* If this was the last BD in the ring, start at the beginning again. */ bdp = fec_enet_get_nextdesc(bdp, fep); + skb_tx_timestamp(skb); + fep->cur_tx = bdp; if (fep->cur_tx == fep->dirty_tx) @@ -436,8 +438,6 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* Trigger transmission start */ writel(0, fep->hwp + FEC_X_DES_ACTIVE); - skb_tx_timestamp(skb); - return NETDEV_TX_OK; } From 24f5b855e17df7e355eacd6c4a12cc4d6a6c9ff0 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 19 Dec 2013 12:40:26 +0800 Subject: [PATCH 043/164] ipv6: always set the new created dst's from in ip6_rt_copy ip6_rt_copy only sets dst.from if ort has flag RTF_ADDRCONF and RTF_DEFAULT. but the prefix routes which did get installed by hand locally can have an expiration, and no any flag combination which can ensure a potential from does never expire, so we should always set the new created dst's from. This also fixes the new created dst is always expired since the ort, which is created by RA, maybe has RTF_EXPIRES and RTF_ADDRCONF, but no RTF_DEFAULT. Suggested-by: Hannes Frederic Sowa CC: Gao feng Signed-off-by: Li RongQing Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a0a48ac3403f3..4b4944c3e4c44 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1905,9 +1905,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, else rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES From a4f63634760acfcc349a5582c77ca4a004c813f6 Mon Sep 17 00:00:00 2001 From: Betty Dall Date: Thu, 19 Dec 2013 10:59:09 -0700 Subject: [PATCH 044/164] atl1c: Check return from pci_find_ext_capability() in atl1c_reset_pcie() The function atl1c_reset_pcie() does not check the return from pci_find_ext_cabability() where it is getting the postion of the PCI_EXT_CAP_ID_ERR. It is possible for the return to be 0. Signed-off-by: Betty Dall Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index a36a760ada28a..29801750f239b 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -145,9 +145,11 @@ static void atl1c_reset_pcie(struct atl1c_hw *hw, u32 flag) * Mask some pcie error bits */ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &data); - data &= ~(PCI_ERR_UNC_DLP | PCI_ERR_UNC_FCP); - pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, data); + if (pos) { + pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &data); + data &= ~(PCI_ERR_UNC_DLP | PCI_ERR_UNC_FCP); + pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, data); + } /* clear error status */ pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_NFED | From 1a1f20bc9debd133549d5b289bd5494a4264a73d Mon Sep 17 00:00:00 2001 From: Leigh Brown Date: Thu, 19 Dec 2013 13:09:48 +0000 Subject: [PATCH 045/164] net: mvmdio: fix interrupt timeout handling This version corrects the whitespace issue. orion_mdio_wait_ready uses wait_event_timeout to wait for the SMI interrupt to fire. wait_event_timeout waits for between "timeout - 1" and "timeout" jiffies. In this case a 1ms timeout when HZ is 1000 results in a wait of 0 to 1 jiffies, causing premature timeouts. This fix ensures a minimum timeout of 2 jiffies, ensuring wait_event_timeout will always wait at least 1 jiffie. Issue reported by Nicolas Schichan. Tested-by: Nicolas Schichan Signed-off-by: Leigh Brown Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvmdio.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 7354960b583bc..c4eeb69a5beee 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -92,6 +92,12 @@ static int orion_mdio_wait_ready(struct mii_bus *bus) if (time_is_before_jiffies(end)) ++timedout; } else { + /* wait_event_timeout does not guarantee a delay of at + * least one whole jiffie, so timeout must be no less + * than two. + */ + if (timeout < 2) + timeout = 2; wait_event_timeout(dev->smi_busy_wait, orion_mdio_smi_is_done(dev), timeout); From 965cdea825693c821d200e38fac9402cde6dce6a Mon Sep 17 00:00:00 2001 From: Wang Weidong Date: Tue, 17 Dec 2013 19:24:33 -0700 Subject: [PATCH 046/164] dccp: catch failed request_module call in dccp_probe init Check the return value of request_module during dccp_probe initialisation, bail out if that call fails. Signed-off-by: Gerrit Renker Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- net/dccp/probe.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 4c6bdf97a6577..595ddf0459db7 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -152,17 +152,6 @@ static const struct file_operations dccpprobe_fops = { .llseek = noop_llseek, }; -static __init int setup_jprobe(void) -{ - int ret = register_jprobe(&dccp_send_probe); - - if (ret) { - request_module("dccp"); - ret = register_jprobe(&dccp_send_probe); - } - return ret; -} - static __init int dccpprobe_init(void) { int ret = -ENOMEM; @@ -174,7 +163,13 @@ static __init int dccpprobe_init(void) if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) goto err0; - ret = setup_jprobe(); + ret = register_jprobe(&dccp_send_probe); + if (ret) { + ret = request_module("dccp"); + if (!ret) + ret = register_jprobe(&dccp_send_probe); + } + if (ret) goto err1; From 540436c80e5918dd5ed838449e108b1726fc4d68 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Dec 2013 11:23:15 +0100 Subject: [PATCH 047/164] netfilter: nft_exthdr: call ipv6_find_hdr() with explicitly initialized offset In nft's nft_exthdr_eval() routine we process IPv6 extension header through invoking ipv6_find_hdr(), but we call it with an uninitialized offset variable that contains some stack value. In ipv6_find_hdr() we then test if the value of offset != 0 and call skb_header_pointer() on that offset in order to map struct ipv6hdr into it. Fix it up by initializing offset to 0 as it was probably intended to be. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Daniel Borkmann Cc: Hannes Frederic Sowa Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 8e0bb75e7c51e..55c939f5371fa 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -31,7 +31,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr, { struct nft_exthdr *priv = nft_expr_priv(expr); struct nft_data *dest = &data[priv->dreg]; - unsigned int offset; + unsigned int offset = 0; int err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); From dfd11184d894cd0a92397b25cac18831a1a6a5bc Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 18 Dec 2013 14:14:52 +0000 Subject: [PATCH 048/164] GFS2: Fix incorrect invalidation for DIO/buffered I/O In patch 209806aba9d540dde3db0a5ce72307f85f33468f we allowed local deferred locks to be granted against a cached exclusive lock. That opened up a corner case which this patch now fixes. The solution to the problem is to check whether we have cached pages each time we do direct I/O and if so to unmap, flush and invalidate those pages. Since the glock state machine normally does that for us, mostly the code will be a no-op. Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index b7fc035a6943c..73f3e4ee40379 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct address_space *mapping = inode->i_mapping; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int rv; @@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ + /* + * Now since we are holding a deferred (CW) lock at this point, you + * might be wondering why this is ever needed. There is a case however + * where we've granted a deferred local lock against a cached exclusive + * glock. That is ok provided all granted local locks are deferred, but + * it also means that it is possible to encounter pages which are + * cached and possibly also mapped. So here we check for that and sort + * them out ahead of the dio. The glock state machine will take care of + * everything else. + * + * If in fact the cached glock state (gl->gl_state) is deferred (CW) in + * the first place, mapping->nr_pages will always be zero. + */ + if (mapping->nrpages) { + loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); + loff_t len = iov_length(iov, nr_segs); + loff_t end = PAGE_ALIGN(offset + len) - 1; + + rv = 0; + if (len == 0) + goto out; + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); + rv = filemap_write_and_wait_range(mapping, lstart, end); + if (rv) + return rv; + truncate_inode_pages_range(mapping, lstart, end); + } + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); From 582d2f7aedfde9e1a01170deb003df510aa778d3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 19 Dec 2013 11:04:14 +0000 Subject: [PATCH 049/164] GFS2: Wait for async DIO in glock state changes We need to wait for any outstanding DIO to complete in a couple of situations. Firstly, in case we are changing out of deferred mode (in inode_go_sync) where GLF_DIRTY will not be set. That call could be prefixed with a test for gl_state == LM_ST_DEFERRED but it doesn't seem worth it bearing in mind that the test for outstanding DIO is very quick anyway, in the usual case that there is none. The second case is in inode_go_lock which will catch the cases where we have a cached EX lock, but where we grant deferred locks against it so that there is no glock state transistion. We only need to wait if the state is not deferred, since DIO is valid anyway in that state. Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index db908f697139c..f88dcd9250109 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl) if (ip && !S_ISREG(ip->i_inode.i_mode)) ip = NULL; - if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) - unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); + if (ip) { + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); + inode_dio_wait(&ip->i_inode); + } if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; @@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh) return error; } + if (gh->gh_state != LM_ST_DEFERRED) + inode_dio_wait(&ip->i_inode); + if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && (gl->gl_state == LM_ST_EXCLUSIVE) && (gh->gh_state == LM_ST_EXCLUSIVE)) { From 443d20fd188208aa4df2118ad49f9168e411016d Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 20 Dec 2013 14:41:54 +0100 Subject: [PATCH 050/164] netfilter: nf_ct_timestamp: Fix BUG_ON after netns deletion When having nf_conntrack_timestamp enabled deleting a netns can lead to the following BUG being triggered: [63836.660000] Kernel bug detected[#1]: [63836.660000] CPU: 0 PID: 0 Comm: swapper Not tainted 3.10.18 #14 [63836.660000] task: 802d9420 ti: 802d2000 task.ti: 802d2000 [63836.660000] $ 0 : 00000000 00000000 00000000 00000000 [63836.660000] $ 4 : 00000001 00000004 00000020 00000020 [63836.660000] $ 8 : 00000000 80064910 00000000 00000000 [63836.660000] $12 : 0bff0002 00000001 00000000 0a0a0abe [63836.660000] $16 : 802e70a0 85f29d80 00000000 00000004 [63836.660000] $20 : 85fb62a0 00000002 802d3bc0 85fb62a0 [63836.660000] $24 : 00000000 87138110 [63836.660000] $28 : 802d2000 802d3b40 00000014 871327cc [63836.660000] Hi : 000005ff [63836.660000] Lo : f2edd000 [63836.660000] epc : 87138794 __nf_ct_ext_add_length+0xe8/0x1ec [nf_conntrack] [63836.660000] Not tainted [63836.660000] ra : 871327cc nf_conntrack_in+0x31c/0x7b8 [nf_conntrack] [63836.660000] Status: 1100d403 KERNEL EXL IE [63836.660000] Cause : 00800034 [63836.660000] PrId : 0001974c (MIPS 74Kc) [63836.660000] Modules linked in: ath9k ath9k_common pppoe ppp_async iptable_nat ath9k_hw ath pppox ppp_generic nf_nat_ipv4 nf_conntrack_ipv4 mac80211 ipt_MASQUERADE cfg80211 xt_time xt_tcpudp xt_state xt_quota xt_policy xt_pkttype xt_owner xt_nat xt_multiport xt_mark xh [63836.660000] Process swapper (pid: 0, threadinfo=802d2000, task=802d9420, tls=00000000) [63836.660000] Stack : 802e70a0 871323d4 00000005 87080234 802e70a0 86d2a840 00000000 00000000 [63836.660000] Call Trace: [63836.660000] [<87138794>] __nf_ct_ext_add_length+0xe8/0x1ec [nf_conntrack] [63836.660000] [<871327cc>] nf_conntrack_in+0x31c/0x7b8 [nf_conntrack] [63836.660000] [<801ff63c>] nf_iterate+0x90/0xec [63836.660000] [<801ff730>] nf_hook_slow+0x98/0x164 [63836.660000] [<80205968>] ip_rcv+0x3e8/0x40c [63836.660000] [<801d9754>] __netif_receive_skb_core+0x624/0x6a4 [63836.660000] [<801da124>] process_backlog+0xa4/0x16c [63836.660000] [<801d9bb4>] net_rx_action+0x10c/0x1e0 [63836.660000] [<8007c5a4>] __do_softirq+0xd0/0x1bc [63836.660000] [<8007c730>] do_softirq+0x48/0x68 [63836.660000] [<8007c964>] irq_exit+0x54/0x70 [63836.660000] [<80060830>] ret_from_irq+0x0/0x4 [63836.660000] [<8006a9f8>] r4k_wait_irqoff+0x18/0x1c [63836.660000] [<8009cfb8>] cpu_startup_entry+0xa4/0x104 [63836.660000] [<802eb918>] start_kernel+0x394/0x3ac [63836.660000] [63836.660000] Code: 00821021 8c420000 2c440001 <00040336> 90440011 92350010 90560010 2485ffff 02a5a821 [63837.040000] ---[ end trace ebf660c3ce3b55e7 ]--- [63837.050000] Kernel panic - not syncing: Fatal exception in interrupt [63837.050000] Rebooting in 3 seconds.. Fix this by not unregistering the conntrack extension in the per-netns cleanup code. This bug was introduced in (73f4001 netfilter: nf_ct_tstamp: move initialization out of pernet_operations). Signed-off-by: Helmut Schaa Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_timestamp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 902fb0a6b38ad..7a394df0deb76 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -97,7 +97,6 @@ int nf_conntrack_tstamp_pernet_init(struct net *net) void nf_conntrack_tstamp_pernet_fini(struct net *net) { nf_conntrack_tstamp_fini_sysctl(net); - nf_ct_extend_unregister(&tstamp_extend); } int nf_conntrack_tstamp_init(void) From 11daf32be9a6041a2406c2cbf76b75a9c4a6eaaa Mon Sep 17 00:00:00 2001 From: Matteo Facchinetti Date: Fri, 20 Dec 2013 10:16:22 +0100 Subject: [PATCH 051/164] powerpc/512x: dts: disable MPC5125 usb module At the moment the USB controller's pin muxing is not setup correctly and causes a kernel panic upon system startup, so disable the USB1 device tree node in the MPC5125 tower board dts file. The USB controller is connected to an USB3320 ULPI transceiver and the device tree should receive an update to reflect correct dependencies and required initialization data before the USB1 node can get re-enabled. Signed-off-by: Matteo Facchinetti Signed-off-by: Anatolij Gustschin --- arch/powerpc/boot/dts/mpc5125twr.dts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts index 0a0fe92216ae0..a618dfc13e4c8 100644 --- a/arch/powerpc/boot/dts/mpc5125twr.dts +++ b/arch/powerpc/boot/dts/mpc5125twr.dts @@ -188,6 +188,10 @@ reg = <0xA000 0x1000>; }; + // disable USB1 port + // TODO: + // correct pinmux config and fix USB3320 ulpi dependency + // before re-enabling it usb@3000 { compatible = "fsl,mpc5121-usb2-dr"; reg = <0x3000 0x400>; @@ -196,6 +200,7 @@ interrupts = <43 0x8>; dr_mode = "host"; phy_type = "ulpi"; + status = "disabled"; }; // 5125 PSCs are not 52xx or 5121 PSC compatible From a68f9614614749727286f675d15f1e09d13cb54a Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 20 Dec 2013 16:52:31 -0800 Subject: [PATCH 052/164] hyperv: Fix race between probe and open calls Moving the register_netdev to the end of probe to prevent possible open call happens before NetVSP is connected. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f8135725bcf67..71baeb3ed905c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -261,9 +261,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct sk_buff *skb; net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; - if (!net) { - netdev_err(net, "got receive callback but net device" - " not initialized yet\n"); + if (!net || net->reg_state != NETREG_REGISTERED) { packet->status = NVSP_STAT_FAIL; return 0; } @@ -435,19 +433,11 @@ static int netvsc_probe(struct hv_device *dev, SET_ETHTOOL_OPS(net, ðtool_ops); SET_NETDEV_DEV(net, &dev->device); - ret = register_netdev(net); - if (ret != 0) { - pr_err("Unable to register netdev.\n"); - free_netdev(net); - goto out; - } - /* Notify the netvsc driver of the new device */ device_info.ring_size = ring_size; ret = rndis_filter_device_add(dev, &device_info); if (ret != 0) { netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - unregister_netdev(net); free_netdev(net); hv_set_drvdata(dev, NULL); return ret; @@ -456,7 +446,13 @@ static int netvsc_probe(struct hv_device *dev, netif_carrier_on(net); -out: + ret = register_netdev(net); + if (ret != 0) { + pr_err("Unable to register netdev.\n"); + rndis_filter_device_remove(dev); + free_netdev(net); + } + return ret; } From b6f8eaece6d5f0247931b6dac140e6cf876f48de Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:19 +0530 Subject: [PATCH 053/164] cxgb4: Reserve stid 0 for T4/T5 adapters When creating offload server entries, an IPv6 passive connection request can trigger a reply with a null STID, whereas the driver would expect the reply 'STID to match the value used for the request. This happens due to h/w limitation on T4 and T5. This patch ensures that STID 0 is never used if the stid range starts from zero. Based on original work by Santosh Rastapur Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d6b12e035a7d9..f36f8e1a101c9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3134,6 +3134,7 @@ static int tid_init(struct tid_info *t) size_t size; unsigned int stid_bmap_size; unsigned int natids = t->natids; + struct adapter *adap = container_of(t, struct adapter, tids); stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); size = t->ntids * sizeof(*t->tid_tab) + @@ -3167,6 +3168,11 @@ static int tid_init(struct tid_info *t) t->afree = t->atid_tab; } bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + (is_t4(adap->params.chip) || is_t5(adap->params.chip))) + __set_bit(0, t->stid_bmap); + return 0; } From 7c89e5550ccb2a3118854639d9525847e896c686 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:20 +0530 Subject: [PATCH 054/164] cxgb4: Include TCP as protocol when creating server filters We were creating LE Workaround Server Filters without specifying IPPROTO_TCP (6) in the filters (when F_PROTOCOL is set in TP_VLAN_PRI_MAP). This meant that UDP packets with matching IP Addresses/Ports would get caught up in the filter and be delivered to ULDs like iw_cxgb4. So, include the protocol information in the server filter properly. Based on original work by Casey Leedom Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 5 +++++ drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f36f8e1a101c9..df1d6b8334a16 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4217,6 +4217,11 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, } } + if (adap->filter_mode & F_PROTOCOL) { + f->fs.val.proto = IPPROTO_TCP; + f->fs.mask.proto = ~0; + } + f->fs.dirsteer = 1; f->fs.iq = queue; /* Mark filter as locked */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 0a8205d69d2c2..d3dd218376b43 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1171,6 +1171,10 @@ #define A_TP_TX_SCHED_PCMD 0x25 +#define S_PROTOCOL 5 +#define V_PROTOCOL(x) ((x) << S_PROTOCOL) +#define F_PROTOCOL V_PROTOCOL(1U) + #define S_PORT 1 #define V_PORT(x) ((x) << S_PORT) #define F_PORT V_PORT(1U) From 470c60c47a03f74f0ec1a83576eb6000025634a9 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:21 +0530 Subject: [PATCH 055/164] cxgb4: Assign filter server TIDs properly The LE workaround code is incorrectly reusing the TCAM TIDs (meant for allocation by firmware in case of hash collisions) for filter servers. This patch assigns the filter server TIDs properly starting from sftid_base index. Based on original work by Santosh Rastapur Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 16 ++++++++++++---- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 9 ++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index df1d6b8334a16..f4f46fcea7b79 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3012,7 +3012,8 @@ int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data) } if (stid >= 0) { t->stid_tab[stid].data = data; - stid += t->stid_base; + stid -= t->nstids; + stid += t->sftid_base; t->stids_in_use++; } spin_unlock_bh(&t->stid_lock); @@ -3024,7 +3025,14 @@ EXPORT_SYMBOL(cxgb4_alloc_sftid); */ void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family) { - stid -= t->stid_base; + /* Is it a server filter TID? */ + if (t->nsftids && (stid >= t->sftid_base)) { + stid -= t->sftid_base; + stid += t->nstids; + } else { + stid -= t->stid_base; + } + spin_lock_bh(&t->stid_lock); if (family == PF_INET) __clear_bit(stid, t->stid_bmap); @@ -4185,7 +4193,7 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, adap = netdev2adap(dev); /* Adjust stid to correct filter index */ - stid -= adap->tids.nstids; + stid -= adap->tids.sftid_base; stid += adap->tids.nftids; /* Check to make sure the filter requested is writable ... @@ -4248,7 +4256,7 @@ int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, adap = netdev2adap(dev); /* Adjust stid to correct filter index */ - stid -= adap->tids.nstids; + stid -= adap->tids.sftid_base; stid += adap->tids.nftids; f = &adap->tids.ftid_tab[stid]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 6f21f2451c305..4dd0a82533e44 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -131,7 +131,14 @@ static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) { - stid -= t->stid_base; + /* Is it a server filter TID? */ + if (t->nsftids && (stid >= t->sftid_base)) { + stid -= t->sftid_base; + stid += t->nstids; + } else { + stid -= t->stid_base; + } + return stid < (t->nstids + t->nsftids) ? t->stid_tab[stid].data : NULL; } From 15f63b74c25797f9246b1738f0cfabd5febe226e Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:22 +0530 Subject: [PATCH 056/164] cxgb4: Account for stid entries properly in case of IPv6 IPv6 uses 2 TIDs with CLIP enabled and 4 TIDs without CLIP. Currently we are incrementing STIDs in use by 1 for both IPv4 and IPv6 which is wrong. Further, driver currently does not have interface to query if CLIP is programmed for particular IPv6 address. So, in this patch we increment/decrement TIDs in use by 4 for IPv6 assuming absence of CLIP. Such assumption keeps us on safe side and we don't end up allocating more stids for IPv6 than actually supported. Based on original work by Santosh Rastapur Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f4f46fcea7b79..f9ca36c256772 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2986,7 +2986,14 @@ int cxgb4_alloc_stid(struct tid_info *t, int family, void *data) if (stid >= 0) { t->stid_tab[stid].data = data; stid += t->stid_base; - t->stids_in_use++; + /* IPv6 requires max of 520 bits or 16 cells in TCAM + * This is equivalent to 4 TIDs. With CLIP enabled it + * needs 2 TIDs. + */ + if (family == PF_INET) + t->stids_in_use++; + else + t->stids_in_use += 4; } spin_unlock_bh(&t->stid_lock); return stid; @@ -3039,7 +3046,10 @@ void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family) else bitmap_release_region(t->stid_bmap, stid, 2); t->stid_tab[stid].data = NULL; - t->stids_in_use--; + if (family == PF_INET) + t->stids_in_use--; + else + t->stids_in_use -= 4; spin_unlock_bh(&t->stid_lock); } EXPORT_SYMBOL(cxgb4_free_stid); From dcf7b6f5bdeaa13d5e465d8795d2e7d6d1e27b65 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:23 +0530 Subject: [PATCH 057/164] cxgb4: Add API to correctly calculate tuple fields Adds API cxgb4_select_ntuple so as to enable Upper Level Drivers to correctly calculate the tuple fields. Adds constant definitions for TP_VLAN_PRI_MAP for the Compressed Filter Tuple field widths and structures and uses them. Also, the CPL Parameters field for T5 is 40 bits so we need to prototype cxgb4_select_ntuple() to calculate and return u64 values. Based on original work by Casey Leedom Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 21 ++++ .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 20 +--- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 35 ++++++ drivers/net/ethernet/chelsio/cxgb4/l2t.h | 3 +- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 103 ++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 69 ++++++++++++ 6 files changed, 235 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 6c9308850453b..56e0415f8cdff 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -228,6 +228,25 @@ struct tp_params { uint32_t dack_re; /* DACK timer resolution */ unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ + + u32 vlan_pri_map; /* cached TP_VLAN_PRI_MAP */ + u32 ingress_config; /* cached TP_INGRESS_CONFIG */ + + /* TP_VLAN_PRI_MAP Compressed Filter Tuple field offsets. This is a + * subset of the set of fields which may be present in the Compressed + * Filter Tuple portion of filters and TCP TCB connections. The + * fields which are present are controlled by the TP_VLAN_PRI_MAP. + * Since a variable number of fields may or may not be present, their + * shifted field positions within the Compressed Filter Tuple may + * vary, or not even be present if the field isn't selected in + * TP_VLAN_PRI_MAP. Since some of these fields are needed in various + * places we store their offsets here, or a -1 if the field isn't + * present. + */ + int vlan_shift; + int vnic_shift; + int port_shift; + int protocol_shift; }; struct vpd_params { @@ -926,6 +945,8 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, const u8 *fw_data, unsigned int fw_size, struct fw_hdr *card_fw, enum dev_state state, int *reset); int t4_prep_adapter(struct adapter *adapter); +int t4_init_tp_params(struct adapter *adap); +int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); void t4_fatal_err(struct adapter *adapter); int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f9ca36c256772..fff02ed1295e5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3755,7 +3755,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET( t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >> (adap->fn * 4)); - lli.filt_mode = adap->filter_mode; + lli.filt_mode = adap->params.tp.vlan_pri_map; /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ for (i = 0; i < NCHAN; i++) lli.tx_modq[i] = i; @@ -4229,13 +4229,13 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, f->fs.val.lip[i] = val[i]; f->fs.mask.lip[i] = ~0; } - if (adap->filter_mode & F_PORT) { + if (adap->params.tp.vlan_pri_map & F_PORT) { f->fs.val.iport = port; f->fs.mask.iport = mask; } } - if (adap->filter_mode & F_PROTOCOL) { + if (adap->params.tp.vlan_pri_map & F_PROTOCOL) { f->fs.val.proto = IPPROTO_TCP; f->fs.mask.proto = ~0; } @@ -5121,7 +5121,7 @@ static int adap_init0(struct adapter *adap) enum dev_state state; u32 params[7], val[7]; struct fw_caps_config_cmd caps_cmd; - int reset = 1, j; + int reset = 1; /* * Contact FW, advertising Master capability (and potentially forcing @@ -5463,21 +5463,11 @@ static int adap_init0(struct adapter *adap) /* * These are finalized by FW initialization, load their values now. */ - v = t4_read_reg(adap, TP_TIMER_RESOLUTION); - adap->params.tp.tre = TIMERRESOLUTION_GET(v); - adap->params.tp.dack_re = DELAYEDACKRESOLUTION_GET(v); t4_read_mtu_tbl(adap, adap->params.mtus, NULL); t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd, adap->params.b_wnd); - /* MODQ_REQ_MAP defaults to setting queues 0-3 to chan 0-3 */ - for (j = 0; j < NCHAN; j++) - adap->params.tp.tx_modq[j] = j; - - t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, - &adap->filter_mode, 1, - TP_VLAN_PRI_MAP); - + t4_init_tp_params(adap); adap->flags |= FW_OK; return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 29878098101eb..cb05be905defd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -45,6 +45,7 @@ #include "l2t.h" #include "t4_msg.h" #include "t4fw_api.h" +#include "t4_regs.h" #define VLAN_NONE 0xfff @@ -411,6 +412,40 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, } EXPORT_SYMBOL(cxgb4_l2t_get); +u64 cxgb4_select_ntuple(struct net_device *dev, + const struct l2t_entry *l2t) +{ + struct adapter *adap = netdev2adap(dev); + struct tp_params *tp = &adap->params.tp; + u64 ntuple = 0; + + /* Initialize each of the fields which we care about which are present + * in the Compressed Filter Tuple. + */ + if (tp->vlan_shift >= 0 && l2t->vlan != VLAN_NONE) + ntuple |= (F_FT_VLAN_VLD | l2t->vlan) << tp->vlan_shift; + + if (tp->port_shift >= 0) + ntuple |= (u64)l2t->lport << tp->port_shift; + + if (tp->protocol_shift >= 0) + ntuple |= (u64)IPPROTO_TCP << tp->protocol_shift; + + if (tp->vnic_shift >= 0) { + u32 viid = cxgb4_port_viid(dev); + u32 vf = FW_VIID_VIN_GET(viid); + u32 pf = FW_VIID_PFN_GET(viid); + u32 vld = FW_VIID_VIVLD_GET(viid); + + ntuple |= (u64)(V_FT_VNID_ID_VF(vf) | + V_FT_VNID_ID_PF(pf) | + V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift; + } + + return ntuple; +} +EXPORT_SYMBOL(cxgb4_select_ntuple); + /* * Called when address resolution fails for an L2T entry to handle packets * on the arpq head. If a packet specifies a failure handler it is invoked, diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h index 108c0f1fce1c4..85eb5c71358d8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h @@ -98,7 +98,8 @@ int cxgb4_l2t_send(struct net_device *dev, struct sk_buff *skb, struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, const struct net_device *physdev, unsigned int priority); - +u64 cxgb4_select_ntuple(struct net_device *dev, + const struct l2t_entry *l2t); void t4_l2t_update(struct adapter *adap, struct neighbour *neigh); struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d); int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 74a6fce5a15a6..e1413eacdbd20 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3808,6 +3808,109 @@ int t4_prep_adapter(struct adapter *adapter) return 0; } +/** + * t4_init_tp_params - initialize adap->params.tp + * @adap: the adapter + * + * Initialize various fields of the adapter's TP Parameters structure. + */ +int t4_init_tp_params(struct adapter *adap) +{ + int chan; + u32 v; + + v = t4_read_reg(adap, TP_TIMER_RESOLUTION); + adap->params.tp.tre = TIMERRESOLUTION_GET(v); + adap->params.tp.dack_re = DELAYEDACKRESOLUTION_GET(v); + + /* MODQ_REQ_MAP defaults to setting queues 0-3 to chan 0-3 */ + for (chan = 0; chan < NCHAN; chan++) + adap->params.tp.tx_modq[chan] = chan; + + /* Cache the adapter's Compressed Filter Mode and global Incress + * Configuration. + */ + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->params.tp.vlan_pri_map, 1, + TP_VLAN_PRI_MAP); + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->params.tp.ingress_config, 1, + TP_INGRESS_CONFIG); + + /* Now that we have TP_VLAN_PRI_MAP cached, we can calculate the field + * shift positions of several elements of the Compressed Filter Tuple + * for this adapter which we need frequently ... + */ + adap->params.tp.vlan_shift = t4_filter_field_shift(adap, F_VLAN); + adap->params.tp.vnic_shift = t4_filter_field_shift(adap, F_VNIC_ID); + adap->params.tp.port_shift = t4_filter_field_shift(adap, F_PORT); + adap->params.tp.protocol_shift = t4_filter_field_shift(adap, + F_PROTOCOL); + + /* If TP_INGRESS_CONFIG.VNID == 0, then TP_VLAN_PRI_MAP.VNIC_ID + * represents the presense of an Outer VLAN instead of a VNIC ID. + */ + if ((adap->params.tp.ingress_config & F_VNIC) == 0) + adap->params.tp.vnic_shift = -1; + + return 0; +} + +/** + * t4_filter_field_shift - calculate filter field shift + * @adap: the adapter + * @filter_sel: the desired field (from TP_VLAN_PRI_MAP bits) + * + * Return the shift position of a filter field within the Compressed + * Filter Tuple. The filter field is specified via its selection bit + * within TP_VLAN_PRI_MAL (filter mode). E.g. F_VLAN. + */ +int t4_filter_field_shift(const struct adapter *adap, int filter_sel) +{ + unsigned int filter_mode = adap->params.tp.vlan_pri_map; + unsigned int sel; + int field_shift; + + if ((filter_mode & filter_sel) == 0) + return -1; + + for (sel = 1, field_shift = 0; sel < filter_sel; sel <<= 1) { + switch (filter_mode & sel) { + case F_FCOE: + field_shift += W_FT_FCOE; + break; + case F_PORT: + field_shift += W_FT_PORT; + break; + case F_VNIC_ID: + field_shift += W_FT_VNIC_ID; + break; + case F_VLAN: + field_shift += W_FT_VLAN; + break; + case F_TOS: + field_shift += W_FT_TOS; + break; + case F_PROTOCOL: + field_shift += W_FT_PROTOCOL; + break; + case F_ETHERTYPE: + field_shift += W_FT_ETHERTYPE; + break; + case F_MACMATCH: + field_shift += W_FT_MACMATCH; + break; + case F_MPSHITTYPE: + field_shift += W_FT_MPSHITTYPE; + break; + case F_FRAGMENTATION: + field_shift += W_FT_FRAGMENTATION; + break; + } + } + return field_shift; +} + int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) { u8 addr[6]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index d3dd218376b43..4082522d81408 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1171,14 +1171,50 @@ #define A_TP_TX_SCHED_PCMD 0x25 +#define S_VNIC 11 +#define V_VNIC(x) ((x) << S_VNIC) +#define F_VNIC V_VNIC(1U) + +#define S_FRAGMENTATION 9 +#define V_FRAGMENTATION(x) ((x) << S_FRAGMENTATION) +#define F_FRAGMENTATION V_FRAGMENTATION(1U) + +#define S_MPSHITTYPE 8 +#define V_MPSHITTYPE(x) ((x) << S_MPSHITTYPE) +#define F_MPSHITTYPE V_MPSHITTYPE(1U) + +#define S_MACMATCH 7 +#define V_MACMATCH(x) ((x) << S_MACMATCH) +#define F_MACMATCH V_MACMATCH(1U) + +#define S_ETHERTYPE 6 +#define V_ETHERTYPE(x) ((x) << S_ETHERTYPE) +#define F_ETHERTYPE V_ETHERTYPE(1U) + #define S_PROTOCOL 5 #define V_PROTOCOL(x) ((x) << S_PROTOCOL) #define F_PROTOCOL V_PROTOCOL(1U) +#define S_TOS 4 +#define V_TOS(x) ((x) << S_TOS) +#define F_TOS V_TOS(1U) + +#define S_VLAN 3 +#define V_VLAN(x) ((x) << S_VLAN) +#define F_VLAN V_VLAN(1U) + +#define S_VNIC_ID 2 +#define V_VNIC_ID(x) ((x) << S_VNIC_ID) +#define F_VNIC_ID V_VNIC_ID(1U) + #define S_PORT 1 #define V_PORT(x) ((x) << S_PORT) #define F_PORT V_PORT(1U) +#define S_FCOE 0 +#define V_FCOE(x) ((x) << S_FCOE) +#define F_FCOE V_FCOE(1U) + #define NUM_MPS_CLS_SRAM_L_INSTANCES 336 #define NUM_MPS_T5_CLS_SRAM_L_INSTANCES 512 @@ -1217,4 +1253,37 @@ #define V_CHIPID(x) ((x) << S_CHIPID) #define G_CHIPID(x) (((x) >> S_CHIPID) & M_CHIPID) +/* TP_VLAN_PRI_MAP controls which subset of fields will be present in the + * Compressed Filter Tuple for LE filters. Each bit set in TP_VLAN_PRI_MAP + * selects for a particular field being present. These fields, when present + * in the Compressed Filter Tuple, have the following widths in bits. + */ +#define W_FT_FCOE 1 +#define W_FT_PORT 3 +#define W_FT_VNIC_ID 17 +#define W_FT_VLAN 17 +#define W_FT_TOS 8 +#define W_FT_PROTOCOL 8 +#define W_FT_ETHERTYPE 16 +#define W_FT_MACMATCH 9 +#define W_FT_MPSHITTYPE 3 +#define W_FT_FRAGMENTATION 1 + +/* Some of the Compressed Filter Tuple fields have internal structure. These + * bit shifts/masks describe those structures. All shifts are relative to the + * base position of the fields within the Compressed Filter Tuple + */ +#define S_FT_VLAN_VLD 16 +#define V_FT_VLAN_VLD(x) ((x) << S_FT_VLAN_VLD) +#define F_FT_VLAN_VLD V_FT_VLAN_VLD(1U) + +#define S_FT_VNID_ID_VF 0 +#define V_FT_VNID_ID_VF(x) ((x) << S_FT_VNID_ID_VF) + +#define S_FT_VNID_ID_PF 7 +#define V_FT_VNID_ID_PF(x) ((x) << S_FT_VNID_ID_PF) + +#define S_FT_VNID_ID_VLD 16 +#define V_FT_VNID_ID_VLD(x) ((x) << S_FT_VNID_ID_VLD) + #endif /* __T4_REGS_H */ From a4ea025fc24532bae8a038d038f8e0f15b8a7d98 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:24 +0530 Subject: [PATCH 058/164] RDMA/cxgb4: Calculate the filter server TID properly Based on original work by Santosh Rastapur Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 12fef76c791c5..02c751543b70b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3323,9 +3323,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) /* * Calculate the server tid from filter hit index from cpl_rx_pkt. */ - stid = (__force int) cpu_to_be32((__force u32) rss->hash_val) - - dev->rdev.lldi.tids->sftid_base - + dev->rdev.lldi.tids->nstids; + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); if (!lep) { From 8c04469057c3307d92d2c7076edcf9eb8f752bca Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:25 +0530 Subject: [PATCH 059/164] RDMA/cxgb4: Server filters are supported only for IPv4 Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 02c751543b70b..bfd4ed78dcc1c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2938,7 +2938,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) /* * Allocate a server TID. */ - if (dev->rdev.lldi.enable_fw_ofld_conn) + if (dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, cm_id->local_addr.ss_family, ep); else From 41b4f86c1368758a02129e0629a9cc7c2811fa32 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 18 Dec 2013 16:38:26 +0530 Subject: [PATCH 060/164] RDMA/cxgb4: Use cxgb4_select_ntuple to correctly calculate ntuple fields Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 71 ++++++++------------------------ 1 file changed, 17 insertions(+), 54 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index bfd4ed78dcc1c..45126879ad28a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -524,50 +524,6 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -#define VLAN_NONE 0xfff -#define FILTER_SEL_VLAN_NONE 0xffff -#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ -#define FILTER_SEL_WIDTH_VIN_P_FC \ - (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ -#define FILTER_SEL_WIDTH_TAG_P_FC \ - (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ -#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) - -static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, - struct l2t_entry *l2t) -{ - unsigned int ntuple = 0; - u32 viid; - - switch (dev->rdev.lldi.filt_mode) { - - /* default filter mode */ - case HW_TPL_FR_MT_PR_IV_P_FC: - if (l2t->vlan == VLAN_NONE) - ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; - else { - ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; - ntuple |= 1 << FILTER_SEL_WIDTH_TAG_P_FC; - } - ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << - FILTER_SEL_WIDTH_VLD_TAG_P_FC; - break; - case HW_TPL_FR_MT_PR_OV_P_FC: { - viid = cxgb4_port_viid(l2t->neigh->dev); - - ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; - ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; - ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; - ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << - FILTER_SEL_WIDTH_VLD_TAG_P_FC; - break; - } - default: - break; - } - return ntuple; -} - static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -641,8 +597,9 @@ static int send_connect(struct c4iw_ep *ep) req->local_ip = la->sin_addr.s_addr; req->peer_ip = ra->sin_addr.s_addr; req->opt0 = cpu_to_be64(opt0); - req->params = cpu_to_be32(select_ntuple(ep->com.dev, - ep->dst, ep->l2t)); + req->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); req->opt2 = cpu_to_be32(opt2); } else { req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); @@ -662,9 +619,9 @@ static int send_connect(struct c4iw_ep *ep) req6->peer_ip_lo = *((__be64 *) (ra6->sin6_addr.s6_addr + 8)); req6->opt0 = cpu_to_be64(opt0); - req6->params = cpu_to_be32( - select_ntuple(ep->com.dev, ep->dst, - ep->l2t)); + req6->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); req6->opt2 = cpu_to_be32(opt2); } } else { @@ -681,8 +638,9 @@ static int send_connect(struct c4iw_ep *ep) t5_req->peer_ip = ra->sin_addr.s_addr; t5_req->opt0 = cpu_to_be64(opt0); t5_req->params = cpu_to_be64(V_FILTER_TUPLE( - select_ntuple(ep->com.dev, - ep->dst, ep->l2t))); + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t))); t5_req->opt2 = cpu_to_be32(opt2); } else { t5_req6 = (struct cpl_t5_act_open_req6 *) @@ -703,7 +661,9 @@ static int send_connect(struct c4iw_ep *ep) (ra6->sin6_addr.s6_addr + 8)); t5_req6->opt0 = cpu_to_be64(opt0); t5_req6->params = (__force __be64)cpu_to_be32( - select_ntuple(ep->com.dev, ep->dst, ep->l2t)); + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); t5_req6->opt2 = cpu_to_be32(opt2); } } @@ -1630,7 +1590,8 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) memset(req, 0, sizeof(*req)); req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); - req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + req->le.filter = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], ep->l2t)); sin = (struct sockaddr_in *)&ep->com.local_addr; req->le.lport = sin->sin_port; @@ -3396,7 +3357,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) window = (__force u16) htons((__force u16)tcph->window); /* Calcuate filter portion for LE region. */ - filter = (__force unsigned int) cpu_to_be32(select_ntuple(dev, dst, e)); + filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple( + dev->rdev.lldi.ports[0], + e)); /* * Synthesize the cpl_pass_accept_req. We have everything except the From db850559a303dc9459d7dd7339bd19a66907a15a Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Wed, 18 Dec 2013 21:33:50 +0530 Subject: [PATCH 061/164] drivers: net : cpsw: pass proper device name while requesting irq During checking the interrupts with "cat /proc/interrupts", it is showing device name as (null), this change was done with commit id aa1a15e2d where request_irq is changed to devm_request_irq also changing the irq name from platform device name to net device name, but the net device is not registered at this point with the network frame work, so devm_request_irq is called with device name as NULL, by which it is showed as "(null)" in "cat /proc/interrupts". So this patch changes back irq name to platform device name itself in devm_request_irq so that the device name shows as below. Previous to this patch root@am335x-evm:~# cat /proc/interrupts CPU0 28: 2265 INTC 12 edma 30: 80 INTC 14 edma_error 56: 0 INTC 40 (null) 57: 1794 INTC 41 (null) 58: 7 INTC 42 (null) 59: 0 INTC 43 (null) With this patch root@am335x-evm:~# cat /proc/interrupts CPU0 28: 213 INTC 12 edma 30: 9 INTC 14 edma_error 56: 0 INTC 40 4a100000.ethernet 57: 16097 INTC 41 4a100000.ethernet 58: 11964 INTC 42 4a100000.ethernet 59: 0 INTC 43 4a100000.ethernet Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 614f284fe1fdc..5330fd298705e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2108,7 +2108,7 @@ static int cpsw_probe(struct platform_device *pdev) while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) { for (i = res->start; i <= res->end; i++) { if (devm_request_irq(&pdev->dev, i, cpsw_interrupt, 0, - dev_name(priv->dev), priv)) { + dev_name(&pdev->dev), priv)) { dev_err(priv->dev, "error attaching irq\n"); goto clean_ale_ret; } From 61e7f09d0f437c9614029445754099383ec2eec4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 19 Dec 2013 02:13:36 +0100 Subject: [PATCH 062/164] ipv4: consistent reporting of pmtu data in case of corking We report different pmtu values back on the first write and on further writes on an corked socket. Also don't include the dst.header_len (respectively exthdrlen) as this should already be dealt with by the interface mtu of the outgoing (virtual) interface and policy of that interface should dictate if fragmentation should happen. Instead reduce the pmtu data by IP options as we do for IPv6. Make the same changes for ip_append_data, where we did not care about options or dst.header_len at all. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 912402752f2ff..df184616493f7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -828,7 +828,7 @@ static int __ip_append_data(struct sock *sk, if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, - mtu-exthdrlen); + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } @@ -1151,7 +1151,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, mtu : 0xFFFF; if (cork->length + size > maxnonfragsize - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); + ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } From bae651dbd7ade3c5d6518f89599ae680a2fe2b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 20 Dec 2013 17:48:54 +0100 Subject: [PATCH 063/164] drm/radeon: fix UVD 256MB check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the kernel might reject our decoding requests. Signed-off-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 373d088bac66d..b9c0529b4a2e1 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -473,7 +473,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, return -EINVAL; } - if ((start >> 28) != (end >> 28)) { + if ((start >> 28) != ((end - 1) >> 28)) { DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", start, end); return -EINVAL; From d00adcc8ae9e22eca9d8af5f66c59ad9a74c90ec Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 23 Dec 2013 09:31:58 -0500 Subject: [PATCH 064/164] drm/radeon: 0x9649 is SUMO2 not SUMO Fixes rendering corruption due to incorrect gfx configuration. bug: https://bugs.freedesktop.org/show_bug.cgi?id=63599 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- include/drm/drm_pciids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 87578c109e486..49376aec2fbb8 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -600,7 +600,7 @@ {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ - {0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ + {0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ From 9fadb352ed73edd7801a280b552d33a6040c8721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 22 Dec 2013 02:18:00 +0100 Subject: [PATCH 065/164] drm/radeon: fix render backend setup for SI and CIK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the render backends of the first shader engine were enabled. The others were erroneously disabled. Enabling the other render backends improves performance a lot. Unigine Sanctuary on Bonaire: Before: 15 fps After: 90 fps Judging from the fan noise, the GPU was also underclocked when the other render backends were disabled, resulting in horrible performance. The fan is a lot noisy under load now. Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 10 +++++----- drivers/gpu/drm/radeon/si.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b43a3a3c90671..138a776469431 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3057,7 +3057,7 @@ static u32 cik_create_bitmask(u32 bit_width) * Returns the disabled RB bitmask. */ static u32 cik_get_rb_disabled(struct radeon_device *rdev, - u32 max_rb_num, u32 se_num, + u32 max_rb_num_per_se, u32 sh_per_se) { u32 data, mask; @@ -3071,7 +3071,7 @@ static u32 cik_get_rb_disabled(struct radeon_device *rdev, data >>= BACKEND_DISABLE_SHIFT; - mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se); + mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se); return data & mask; } @@ -3088,7 +3088,7 @@ static u32 cik_get_rb_disabled(struct radeon_device *rdev, */ static void cik_setup_rb(struct radeon_device *rdev, u32 se_num, u32 sh_per_se, - u32 max_rb_num) + u32 max_rb_num_per_se) { int i, j; u32 data, mask; @@ -3098,7 +3098,7 @@ static void cik_setup_rb(struct radeon_device *rdev, for (i = 0; i < se_num; i++) { for (j = 0; j < sh_per_se; j++) { cik_select_se_sh(rdev, i, j); - data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); + data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se); if (rdev->family == CHIP_HAWAII) disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); else @@ -3108,7 +3108,7 @@ static void cik_setup_rb(struct radeon_device *rdev, cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); mask = 1; - for (i = 0; i < max_rb_num; i++) { + for (i = 0; i < max_rb_num_per_se * se_num; i++) { if (!(disabled_rbs & mask)) enabled_rbs |= mask; mask <<= 1; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index a36736dab5e06..3eed9a1e44eaa 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2811,7 +2811,7 @@ static void si_setup_spi(struct radeon_device *rdev, } static u32 si_get_rb_disabled(struct radeon_device *rdev, - u32 max_rb_num, u32 se_num, + u32 max_rb_num_per_se, u32 sh_per_se) { u32 data, mask; @@ -2825,14 +2825,14 @@ static u32 si_get_rb_disabled(struct radeon_device *rdev, data >>= BACKEND_DISABLE_SHIFT; - mask = si_create_bitmask(max_rb_num / se_num / sh_per_se); + mask = si_create_bitmask(max_rb_num_per_se / sh_per_se); return data & mask; } static void si_setup_rb(struct radeon_device *rdev, u32 se_num, u32 sh_per_se, - u32 max_rb_num) + u32 max_rb_num_per_se) { int i, j; u32 data, mask; @@ -2842,14 +2842,14 @@ static void si_setup_rb(struct radeon_device *rdev, for (i = 0; i < se_num; i++) { for (j = 0; j < sh_per_se; j++) { si_select_se_sh(rdev, i, j); - data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); + data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se); disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH); } } si_select_se_sh(rdev, 0xffffffff, 0xffffffff); mask = 1; - for (i = 0; i < max_rb_num; i++) { + for (i = 0; i < max_rb_num_per_se * se_num; i++) { if (!(disabled_rbs & mask)) enabled_rbs |= mask; mask <<= 1; From 439a1cfffe2c1a06e5a6394ccd5d18a8e89b15d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 22 Dec 2013 02:18:01 +0100 Subject: [PATCH 066/164] drm/radeon: expose render backend mask to the userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow userspace to correctly program the PA_SC_RASTER_CONFIG register, so it can be considered a fix. Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 2 ++ drivers/gpu/drm/radeon/radeon.h | 4 ++-- drivers/gpu/drm/radeon/radeon_kms.c | 9 +++++++++ drivers/gpu/drm/radeon/si.c | 2 ++ include/uapi/drm/radeon_drm.h | 2 ++ 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 138a776469431..e950fabd7f5e4 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3114,6 +3114,8 @@ static void cik_setup_rb(struct radeon_device *rdev, mask <<= 1; } + rdev->config.cik.backend_enable_mask = enabled_rbs; + for (i = 0; i < se_num; i++) { cik_select_se_sh(rdev, i, 0xffffffff); data = 0; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b1f990d0eaa10..45e1f447bc794 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1940,7 +1940,7 @@ struct si_asic { unsigned sc_earlyz_tile_fifo_size; unsigned num_tile_pipes; - unsigned num_backends_per_se; + unsigned backend_enable_mask; unsigned backend_disable_mask_per_asic; unsigned backend_map; unsigned num_texture_channel_caches; @@ -1970,7 +1970,7 @@ struct cik_asic { unsigned sc_earlyz_tile_fifo_size; unsigned num_tile_pipes; - unsigned num_backends_per_se; + unsigned backend_enable_mask; unsigned backend_disable_mask_per_asic; unsigned backend_map; unsigned num_texture_channel_caches; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 55d0b474bd371..21d593c0ecaf4 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -461,6 +461,15 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case RADEON_INFO_SI_CP_DMA_COMPUTE: *value = 1; break; + case RADEON_INFO_SI_BACKEND_ENABLED_MASK: + if (rdev->family >= CHIP_BONAIRE) { + *value = rdev->config.cik.backend_enable_mask; + } else if (rdev->family >= CHIP_TAHITI) { + *value = rdev->config.si.backend_enable_mask; + } else { + DRM_DEBUG_KMS("BACKEND_ENABLED_MASK is si+ only!\n"); + } + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 3eed9a1e44eaa..85e1edfaa3bed 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2855,6 +2855,8 @@ static void si_setup_rb(struct radeon_device *rdev, mask <<= 1; } + rdev->config.si.backend_enable_mask = enabled_rbs; + for (i = 0; i < se_num; i++) { si_select_se_sh(rdev, i, 0xffffffff); data = 0; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 2f3f7ea8c77b8..fe421e8a431bc 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -983,6 +983,8 @@ struct drm_radeon_cs { #define RADEON_INFO_SI_CP_DMA_COMPUTE 0x17 /* CIK macrotile mode array */ #define RADEON_INFO_CIK_MACROTILE_MODE_ARRAY 0x18 +/* query the number of render backends */ +#define RADEON_INFO_SI_BACKEND_ENABLED_MASK 0x19 struct drm_radeon_info { From 35a905282b20e556cd09f348f9c2bc8a22ea26d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 23 Dec 2013 17:11:35 +0100 Subject: [PATCH 067/164] drm/radeon: set correct pipe config for Hawaii in DCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_crtc.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index b1970596a782a..ec97bad689f67 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1180,19 +1180,12 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, fb_format |= EVERGREEN_GRPH_ARRAY_MODE(EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1); if (rdev->family >= CHIP_BONAIRE) { - u32 num_pipe_configs = rdev->config.cik.max_tile_pipes; - u32 num_rb = rdev->config.cik.max_backends_per_se; - if (num_pipe_configs > 8) - num_pipe_configs = 8; - if (num_pipe_configs == 8) - fb_format |= CIK_GRPH_PIPE_CONFIG(CIK_ADDR_SURF_P8_32x32_16x16); - else if (num_pipe_configs == 4) { - if (num_rb == 4) - fb_format |= CIK_GRPH_PIPE_CONFIG(CIK_ADDR_SURF_P4_16x16); - else if (num_rb < 4) - fb_format |= CIK_GRPH_PIPE_CONFIG(CIK_ADDR_SURF_P4_8x16); - } else if (num_pipe_configs == 2) - fb_format |= CIK_GRPH_PIPE_CONFIG(CIK_ADDR_SURF_P2); + /* Read the pipe config from the 2D TILED SCANOUT mode. + * It should be the same for the other modes too, but not all + * modes set the pipe config field. */ + u32 pipe_config = (rdev->config.cik.tile_mode_array[10] >> 6) & 0x1f; + + fb_format |= CIK_GRPH_PIPE_CONFIG(pipe_config); } else if ((rdev->family == CHIP_TAHITI) || (rdev->family == CHIP_PITCAIRN)) fb_format |= SI_GRPH_PIPE_CONFIG(SI_ADDR_SURF_P8_32x32_8x16); From e3ea94a60f12025a3a547964c31245e71ac8fa77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 23 Dec 2013 17:11:36 +0100 Subject: [PATCH 068/164] drm/radeon: set correct number of banks for CIK chips in DCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't have the NUM_BANKS parameter, so we have to calculate it from the other parameters. NUM_BANKS is not constant on CIK. This fixes 2D tiling for the display engine on CIK. Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_crtc.c | 64 +++++++++++++++++--------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index ec97bad689f67..0b9621c9aeea3 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1143,31 +1143,53 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, } if (tiling_flags & RADEON_TILING_MACRO) { - if (rdev->family >= CHIP_BONAIRE) - tmp = rdev->config.cik.tile_config; - else if (rdev->family >= CHIP_TAHITI) - tmp = rdev->config.si.tile_config; - else if (rdev->family >= CHIP_CAYMAN) - tmp = rdev->config.cayman.tile_config; - else - tmp = rdev->config.evergreen.tile_config; + evergreen_tiling_fields(tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); - switch ((tmp & 0xf0) >> 4) { - case 0: /* 4 banks */ - fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_4_BANK); - break; - case 1: /* 8 banks */ - default: - fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_8_BANK); - break; - case 2: /* 16 banks */ - fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_16_BANK); - break; + /* Set NUM_BANKS. */ + if (rdev->family >= CHIP_BONAIRE) { + unsigned tileb, index, num_banks, tile_split_bytes; + + /* Calculate the macrotile mode index. */ + tile_split_bytes = 64 << tile_split; + tileb = 8 * 8 * target_fb->bits_per_pixel / 8; + tileb = min(tile_split_bytes, tileb); + + for (index = 0; tileb > 64; index++) { + tileb >>= 1; + } + + if (index >= 16) { + DRM_ERROR("Wrong screen bpp (%u) or tile split (%u)\n", + target_fb->bits_per_pixel, tile_split); + return -EINVAL; + } + + num_banks = (rdev->config.cik.macrotile_mode_array[index] >> 6) & 0x3; + fb_format |= EVERGREEN_GRPH_NUM_BANKS(num_banks); + } else { + /* SI and older. */ + if (rdev->family >= CHIP_TAHITI) + tmp = rdev->config.si.tile_config; + else if (rdev->family >= CHIP_CAYMAN) + tmp = rdev->config.cayman.tile_config; + else + tmp = rdev->config.evergreen.tile_config; + + switch ((tmp & 0xf0) >> 4) { + case 0: /* 4 banks */ + fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_4_BANK); + break; + case 1: /* 8 banks */ + default: + fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_8_BANK); + break; + case 2: /* 16 banks */ + fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_16_BANK); + break; + } } fb_format |= EVERGREEN_GRPH_ARRAY_MODE(EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1); - - evergreen_tiling_fields(tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); fb_format |= EVERGREEN_GRPH_TILE_SPLIT(tile_split); fb_format |= EVERGREEN_GRPH_BANK_WIDTH(bankw); fb_format |= EVERGREEN_GRPH_BANK_HEIGHT(bankh); From 9482d0d37b46d2bd968d357bbd912cae49caf163 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 23 Dec 2013 11:31:44 -0500 Subject: [PATCH 069/164] drm/radeon: Bump version for CIK DCE tiling fix Note when CIK DCE tiling was fixed. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 1958b36ad0e5c..db39ea36bf22f 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -77,9 +77,10 @@ * 2.33.0 - Add SI tiling mode array query * 2.34.0 - Add CIK tiling mode array query * 2.35.0 - Add CIK macrotile mode array query + * 2.36.0 - Fix CIK DCE tiling setup */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 35 +#define KMS_DRIVER_MINOR 36 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); From 797f87f83b60685ff8a13fa0572d2f10393c50d3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Dec 2013 12:17:00 +0100 Subject: [PATCH 070/164] macvlan: fix netdev feature propagation from lower device There are inconsistencies wrt. feature propagation/inheritance between macvlan and the underlying interface. When a feature is turned off on the real device before a macvlan is created on top, these will remain enabled on the macvlan device, whereas turning off the feature on the lower device after macvlan creation the kernel will propagate the changes to the macvlan. The second issue is that, when propagating changes from underlying device to the macvlan interface, macvlan can erronously lose its NETIF_F_LLTX flag, as features are anded with the underlying device. However, LLTX should be kept since it has no dependencies on physical hardware (LLTX is set on macvlan creation regardless of the lower device properties, see 8ffab51b3dfc54876f145f15b351c41f3f703195 (macvlan: lockless tx path). The LLTX flag is now forced regardless of user settings in absence of layer2 hw acceleration (a6cc0cfa72e0b6d9f2c8fd858aa, net: Add layer 2 hardware acceleration operations for macvlan devices). Use netdev_increment_features to rebuild the feature set on capability changes on either the lower device or on the macvlan interface. As pointed out by Ben Hutchings, use netdev_update_features on NETDEV_FEAT_CHANGE event (it calls macvlan_fix_features/netdev_features_change if needed). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index acf93798dc675..60406b01f9eb3 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -690,8 +690,19 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, netdev_features_t features) { struct macvlan_dev *vlan = netdev_priv(dev); + netdev_features_t mask; - return features & (vlan->set_features | ~MACVLAN_FEATURES); + features |= NETIF_F_ALL_FOR_ALL; + features &= (vlan->set_features | ~MACVLAN_FEATURES); + mask = features; + + features = netdev_increment_features(vlan->lowerdev->features, + features, + mask); + if (!vlan->fwd_priv) + features |= NETIF_F_LLTX; + + return features; } static const struct ethtool_ops macvlan_ethtool_ops = { @@ -1019,9 +1030,8 @@ static int macvlan_device_event(struct notifier_block *unused, break; case NETDEV_FEAT_CHANGE: list_for_each_entry(vlan, &port->vlans, list) { - vlan->dev->features = dev->features & MACVLAN_FEATURES; vlan->dev->gso_max_size = dev->gso_max_size; - netdev_features_change(vlan->dev); + netdev_update_features(vlan->dev); } break; case NETDEV_UNREGISTER: From db12cf27435356017e7ab375ef5e82a1cc749384 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Dec 2013 17:09:41 +0100 Subject: [PATCH 071/164] netfilter: WARN about wrong usage of sequence number adjustments Since commit 41d73ec053d2 (netfilter: nf_conntrack: make sequence number adjustments usuable without NAT), the sequence number extension is dynamically allocated. Instead of dying, give a WARN splash, in case of wrong usage of the seqadj code, e.g. when forgetting to allocate via nfct_seqadj_ext_add(). Wrong usage have been seen in the IPVS code path. Signed-off-by: Jesper Dangaard Brouer Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/nf_conntrack_seqadj.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 17c1bcb182c6b..b2d38da678220 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -36,6 +36,11 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, if (off == 0) return 0; + if (unlikely(!seqadj)) { + WARN(1, "Wrong seqadj usage, missing nfct_seqadj_ext_add()\n"); + return 0; + } + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); spin_lock_bh(&ct->lock); From b25adce1606427fd88da08f5203714cada7f6a98 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Dec 2013 17:09:47 +0100 Subject: [PATCH 072/164] ipvs: correct usage/allocation of seqadj ext in ipvs The IPVS FTP helper ip_vs_ftp could trigger an OOPS in nf_ct_seqadj_set, after commit 41d73ec053d2 (netfilter: nf_conntrack: make sequence number adjustments usuable without NAT). This is because, the seqadj ext is now allocated dynamically, and the IPVS code didn't handle this situation. Fix this in the IPVS nfct code by invoking the alloc function nfct_seqadj_ext_add(). Fixes: 41d73ec053d2 (netfilter: nf_conntrack: make sequence number adjustments usuable without NAT) Suggested-by: Julian Anastasov Signed-off-by: Jesper Dangaard Brouer Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_nfct.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index c8beafd401aa2..5a355a46d1dc4 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -97,6 +98,11 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return; + /* Applications may adjust TCP seqs */ + if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && + !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) + return; + /* * The connection is not yet in the hashtable, so we update it. * CIP->VIP will remain the same, so leave the tuple in From c2349758acf1874e4c2b93fe41d072336f1a31d0 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 18 Dec 2013 23:49:42 -0500 Subject: [PATCH 073/164] rds: prevent dereference of a NULL device Binding might result in a NULL device, which is dereferenced causing this BUG: [ 1317.260548] BUG: unable to handle kernel NULL pointer dereference at 000000000000097 4 [ 1317.261847] IP: [] rds_ib_laddr_check+0x82/0x110 [ 1317.263315] PGD 418bcb067 PUD 3ceb21067 PMD 0 [ 1317.263502] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 1317.264179] Dumping ftrace buffer: [ 1317.264774] (ftrace buffer empty) [ 1317.265220] Modules linked in: [ 1317.265824] CPU: 4 PID: 836 Comm: trinity-child46 Tainted: G W 3.13.0-rc4- next-20131218-sasha-00013-g2cebb9b-dirty #4159 [ 1317.267415] task: ffff8803ddf33000 ti: ffff8803cd31a000 task.ti: ffff8803cd31a000 [ 1317.268399] RIP: 0010:[] [] rds_ib_laddr_check+ 0x82/0x110 [ 1317.269670] RSP: 0000:ffff8803cd31bdf8 EFLAGS: 00010246 [ 1317.270230] RAX: 0000000000000000 RBX: ffff88020b0dd388 RCX: 0000000000000000 [ 1317.270230] RDX: ffffffff8439822e RSI: 00000000000c000a RDI: 0000000000000286 [ 1317.270230] RBP: ffff8803cd31be38 R08: 0000000000000000 R09: 0000000000000000 [ 1317.270230] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [ 1317.270230] R13: 0000000054086700 R14: 0000000000a25de0 R15: 0000000000000031 [ 1317.270230] FS: 00007ff40251d700(0000) GS:ffff88022e200000(0000) knlGS:000000000000 0000 [ 1317.270230] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1317.270230] CR2: 0000000000000974 CR3: 00000003cd478000 CR4: 00000000000006e0 [ 1317.270230] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1317.270230] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602 [ 1317.270230] Stack: [ 1317.270230] 0000000054086700 5408670000a25de0 5408670000000002 0000000000000000 [ 1317.270230] ffffffff84223542 00000000ea54c767 0000000000000000 ffffffff86d26160 [ 1317.270230] ffff8803cd31be68 ffffffff84223556 ffff8803cd31beb8 ffff8800c6765280 [ 1317.270230] Call Trace: [ 1317.270230] [] ? rds_trans_get_preferred+0x42/0xa0 [ 1317.270230] [] rds_trans_get_preferred+0x56/0xa0 [ 1317.270230] [] rds_bind+0x73/0xf0 [ 1317.270230] [] SYSC_bind+0x92/0xf0 [ 1317.270230] [] ? context_tracking_user_exit+0xb8/0x1d0 [ 1317.270230] [] ? trace_hardirqs_on+0xd/0x10 [ 1317.270230] [] ? syscall_trace_enter+0x32/0x290 [ 1317.270230] [] SyS_bind+0xe/0x10 [ 1317.270230] [] tracesys+0xdd/0xe2 [ 1317.270230] Code: 00 8b 45 cc 48 8d 75 d0 48 c7 45 d8 00 00 00 00 66 c7 45 d0 02 00 89 45 d4 48 89 df e8 78 49 76 ff 41 89 c4 85 c0 75 0c 48 8b 03 <80> b8 74 09 00 00 01 7 4 06 41 bc 9d ff ff ff f6 05 2a b6 c2 02 [ 1317.270230] RIP [] rds_ib_laddr_check+0x82/0x110 [ 1317.270230] RSP [ 1317.270230] CR2: 0000000000000974 Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/rds/ib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index b4c8b0022feee..ba2dffeff6087 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support iWARP devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", From 1a29321ed045e3aad23c5f7b63036e465ee3093a Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 23 Dec 2013 08:02:11 -0500 Subject: [PATCH 074/164] net_sched: act: Dont increment refcnt on replace This is a bug fix. The existing code tries to kill many birds with one stone: Handling binding of actions to filters, new actions and replacing of action attributes. A simple test case to illustrate: XXXX moja@fe1:~$ sudo tc actions add action drop index 12 moja@fe1:~$ actions get action gact index 12 action order 1: gact action drop random type none pass val 0 index 12 ref 1 bind 0 moja@fe1:~$ sudo tc actions replace action ok index 12 moja@fe1:~$ actions get action gact index 12 action order 1: gact action drop random type none pass val 0 index 12 ref 2 bind 0 XXXX The above shows the refcounf being wrongly incremented on replace. There are more complex scenarios with binding of actions to filters that i am leaving out that didnt work as well... Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 10 +++++----- net/sched/act_gact.c | 7 ++++--- net/sched/act_ipt.c | 8 +++++--- net/sched/act_nat.c | 10 +++++----- net/sched/act_pedit.c | 8 +++++--- net/sched/act_police.c | 4 +++- net/sched/act_simple.c | 9 ++++++--- net/sched/act_skbedit.c | 7 ++++--- 8 files changed, 37 insertions(+), 26 deletions(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 5c5edf56adbd4..11fe1a416433f 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -77,16 +77,16 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, &csum_idx_gen, &csum_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_csum(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_csum(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &csum_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &csum_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_csum(pc); spin_lock_bh(&p->tcf_lock); p->tcf_action = parm->action; p->update_flags = parm->update_flags; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 5645a4d32abdd..eb9ba60ebab43 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -102,10 +102,11 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_hash_release(pc, bind, &gact_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &gact_hash_info); + if (!ovr) return -EEXIST; - } } gact = to_gact(pc); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 882a89762f77c..dcbfe8ce04a6a 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -141,10 +141,12 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_ipt_release(to_ipt(pc), bind); + if (bind)/* dont override defaults */ + return 0; + tcf_ipt_release(to_ipt(pc), bind); + + if (!ovr) return -EEXIST; - } } ipt = to_ipt(pc); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 6a15ace002411..76869538d0287 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -70,15 +70,15 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, &nat_idx_gen, &nat_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_nat(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &nat_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &nat_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_nat(pc); spin_lock_bh(&p->tcf_lock); p->old_addr = parm->old_addr; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 03b67674169c5..7aa2dcd989f84 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -84,10 +84,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { p = to_pedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &pedit_hash_info); + tcf_hash_release(pc, bind, &pedit_hash_info); + if (bind) + return 0; + if (!ovr) return -EEXIST; - } + if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 16a62c36928a7..ef246d87e68bb 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -177,10 +177,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; + return 0; } if (ovr) goto override; - return ret; + /* not replacing */ + return -EEXIST; } } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 31157d3e729c8..f7b45ab853887 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -142,10 +142,13 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_defact(pc); - if (!ovr) { - tcf_simp_release(d, bind); + + if (bind) + return 0; + tcf_simp_release(d, bind); + if (!ovr) return -EEXIST; - } + reset_policy(d, defdata, parm); } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 35ea643b43255..8fe9d25c3008e 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -120,10 +120,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &skbedit_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &skbedit_hash_info); + if (!ovr) return -EEXIST; - } } spin_lock_bh(&d->tcf_lock); From 375679104ab3ccfd18dcbd7ba503734fb9a2c63a Mon Sep 17 00:00:00 2001 From: Nithin Sujir Date: Thu, 19 Dec 2013 17:44:11 -0800 Subject: [PATCH 075/164] tg3: Expand 4g_overflow_test workaround to skb fragments of any size. The current driver assumes that an skb fragment can only be upto jumbo size. Presumably this was a fast-path optimization. This assumption is no longer true as fragments can be upto 32k. v2: Remove unnecessary parantheses per Eric Dumazet. Cc: stable@vger.kernel.org Signed-off-by: Nithin Nayak Sujir Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index f3dd93b4aeaac..15a66e4b1f57a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7622,7 +7622,7 @@ static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len) { u32 base = (u32) mapping & 0xffffffff; - return (base > 0xffffdcc0) && (base + len + 8 < base); + return base + len + 8 < base; } /* Test for TSO DMA buffers that cross into regions which are within MSS bytes From 37ec274e9713eafc2ba6c4471420f06cb8f68ecf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Dec 2013 18:10:40 -0800 Subject: [PATCH 076/164] arc_emac: fix potential use after free Signed-off-by: Eric Dumazet skb_tx_timestamp(skb) should be called _before_ TX completion has a chance to trigger, otherwise it is too late and we access freed memory. Fixes: e4f2379db6c6 ("ethernet/arc/arc_emac - Add new driver") From: Eric Dumazet Cc: Alexey Brodkin Cc: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index b2ffad1304d22..248baf6273fb7 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -565,6 +565,8 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) /* Make sure pointer to data buffer is set */ wmb(); + skb_tx_timestamp(skb); + *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len); /* Increment index to point to the next BD */ @@ -579,8 +581,6 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) arc_reg_set(priv, R_STATUS, TXPL_MASK); - skb_tx_timestamp(skb); - return NETDEV_TX_OK; } From 73409f3b0ff0ae7bc1f647936b23e6d5d5dcbe28 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Dec 2013 13:04:33 -0500 Subject: [PATCH 077/164] net: Add some clarification to skb_tx_timestamp() comment. We've seen so many instances of people invoking skb_tx_timestamp() after the device already has been given the packet, that it's worth being a little bit more verbose and explicit in this comment. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6aae838905200..6f69b3f914fbf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2531,6 +2531,10 @@ static inline void sw_tx_timestamp(struct sk_buff *skb) * Ethernet MAC Drivers should call this function in their hard_xmit() * function immediately before giving the sk_buff to the MAC hardware. * + * Specifically, one should make absolutely sure that this function is + * called before TX completion of this packet can trigger. Otherwise + * the packet could potentially already be freed. + * * @skb: A socket buffer. */ static inline void skb_tx_timestamp(struct sk_buff *skb) From 4710b2ba873692194c636811ceda398f95e02db2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 20 Dec 2013 15:10:44 +1100 Subject: [PATCH 078/164] netxen: Correct off-by-one errors in bounds checks netxen_process_lro() contains two bounds checks. One for the ring number against the number of rings, and one for the Rx buffer ID against the array of receive buffers. Both of these have off-by-one errors, using > instead of >=. The correct versions are used in netxen_process_rcv(), they're just wrong in netxen_process_lro(). Signed-off-by: David Gibson Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 7692dfd4f262d..cc68657f05368 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -1604,13 +1604,13 @@ netxen_process_lro(struct netxen_adapter *adapter, u32 seq_number; u8 vhdr_len = 0; - if (unlikely(ring > adapter->max_rds_rings)) + if (unlikely(ring >= adapter->max_rds_rings)) return NULL; rds_ring = &recv_ctx->rds_rings[ring]; index = netxen_get_lro_sts_refhandle(sts_data0); - if (unlikely(index > rds_ring->num_desc)) + if (unlikely(index >= rds_ring->num_desc)) return NULL; buffer = &rds_ring->rx_buf_arr[index]; From 6a9eadccff2926e392173a989042f14c867cffbf Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 20 Dec 2013 17:20:12 +0800 Subject: [PATCH 079/164] ipv6: release dst properly in ipip6_tunnel_xmit if a dst is not attached to anywhere, it should be released before exit ipip6_tunnel_xmit, otherwise cause dst memory leakage. Fixes: 61c1db7fae21 ("ipv6: sit: add GSO/TSO support") Signed-off-by: Li RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a710fdec42d38..c87482252577c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -966,8 +966,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); - if (IS_ERR(skb)) + if (IS_ERR(skb)) { + ip_rt_put(rt); goto out; + } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); From ebcc943c11f48617a7536a132c64d2637075e407 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 9 Dec 2013 09:18:09 -0600 Subject: [PATCH 080/164] Add missing end of line termination to some cifs messages Signed-off-by: Steve French Signed-off-by: Gregor Beck Reviewed-by: Jeff Layton --- fs/cifs/cifssmb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 124aa0230c1b8..d707edb6b8526 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4010,7 +4010,7 @@ CIFSSMBQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); + cifs_dbg(FYI, "Send error in QFileInfo = %d", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -4179,7 +4179,7 @@ CIFSSMBUnixQFileInfo(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); + cifs_dbg(FYI, "Send error in UnixQFileInfo = %d", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -4263,7 +4263,7 @@ CIFSSMBUnixQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); + cifs_dbg(FYI, "Send error in UnixQPathInfo = %d", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); From 750b8de6c4277d7034061e1da50663aa1b0479e4 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Mon, 25 Nov 2013 17:09:48 +0000 Subject: [PATCH 081/164] cifs: We do not drop reference to tlink in CIFSCheckMFSymlink() When we obtain tcon from cifs_sb, we use cifs_sb_tlink() to first obtain tlink which also grabs a reference to it. We do not drop this reference to tlink once we are done with the call. The patch fixes this issue by instead passing tcon as a parameter and avoids having to obtain a reference to the tlink. A lookup for the tcon is already made in the calling functions and this way we avoid having to re-run the lookup. This is also consistent with the argument list for other similar calls for M-F symlinks. We should also return an ENOSYS when we do not find a protocol specific function to lookup the MF Symlink data. Signed-off-by: Sachin Prabhu Reviewed-by: Jeff Layton CC: Stable Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 7 ++++--- fs/cifs/inode.c | 6 ++++-- fs/cifs/link.c | 26 +++++++++++--------------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index aa3397620342d..2c29db6a247e4 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -477,9 +477,10 @@ extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, const int netfid, __u64 *pExtAttrBits, __u64 *pMask); extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); -extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, - const unsigned char *path, - struct cifs_sb_info *cifs_sb, unsigned int xid); +extern int CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + struct cifs_fattr *fattr, + const unsigned char *path); extern int mdfour(unsigned char *, unsigned char *, int); extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, const struct nls_table *codepage); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 36f9ebb93ceba..49719b8228e58 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -383,7 +383,8 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* check for Minshall+French symlinks */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { - int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); + int tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr, + full_path); if (tmprc) cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); } @@ -799,7 +800,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, /* check for Minshall+French symlinks */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { - tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); + tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr, + full_path); if (tmprc) cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index cc0234710ddbb..92aee08483a52 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -354,34 +354,30 @@ open_query_close_cifs_symlink(const unsigned char *path, char *pbuf, int -CIFSCheckMFSymlink(struct cifs_fattr *fattr, - const unsigned char *path, - struct cifs_sb_info *cifs_sb, unsigned int xid) +CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, + const unsigned char *path) { - int rc = 0; + int rc; u8 *buf = NULL; unsigned int link_len = 0; unsigned int bytes_read = 0; - struct cifs_tcon *ptcon; if (!CIFSCouldBeMFSymlink(fattr)) /* it's not a symlink */ return 0; buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); - if (!buf) { - rc = -ENOMEM; - goto out; - } + if (!buf) + return -ENOMEM; - ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); - if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink)) - rc = ptcon->ses->server->ops->query_mf_symlink(path, buf, - &bytes_read, cifs_sb, xid); + if (tcon->ses->server->ops->query_mf_symlink) + rc = tcon->ses->server->ops->query_mf_symlink(path, buf, + &bytes_read, cifs_sb, xid); else - goto out; + rc = -ENOSYS; - if (rc != 0) + if (rc) goto out; if (bytes_read == 0) /* not a symlink */ From f1e3268126a35b9d3cb8bf67487fcc6cd13991d8 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Wed, 11 Dec 2013 16:29:53 -0600 Subject: [PATCH 082/164] cifs: set FILE_CREATED Set FILE_CREATED on O_CREAT|O_EXCL. cifs code didn't change during commit 116cc0225381415b96551f725455d067f63a76a0 Kernel bugzilla 66251 Signed-off-by: Shirish Pargaonkar Acked-by: Jeff Layton CC: Stable Signed-off-by: Steve French --- fs/cifs/dir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 11ff5f116b20e..a514e0a65f69b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -193,7 +193,7 @@ check_name(struct dentry *direntry) static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, struct tcon_link *tlink, unsigned oflags, umode_t mode, - __u32 *oplock, struct cifs_fid *fid, int *created) + __u32 *oplock, struct cifs_fid *fid) { int rc = -ENOENT; int create_options = CREATE_NOT_DIR; @@ -349,7 +349,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, .device = 0, }; - *created |= FILE_CREATED; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { args.uid = current_fsuid(); if (inode->i_mode & S_ISGID) @@ -480,13 +479,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, cifs_add_pending_open(&fid, tlink, &open); rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fid, opened); + &oplock, &fid); if (rc) { cifs_del_pending_open(&open); goto out; } + if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + *opened |= FILE_CREATED; + rc = finish_open(file, direntry, generic_file_open, opened); if (rc) { if (server->ops->close) @@ -529,7 +531,6 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, struct TCP_Server_Info *server; struct cifs_fid fid; __u32 oplock; - int created = FILE_CREATED; cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", inode, direntry->d_name.name, direntry); @@ -546,7 +547,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, server->ops->new_lease_key(&fid); rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fid, &created); + &oplock, &fid); if (!rc && server->ops->close) server->ops->close(xid, tcon, &fid); From e38195bf32d7ccb2ae3f56f36b895daf455ffd94 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Dec 2013 18:32:35 +0100 Subject: [PATCH 083/164] netfilter: nf_tables: fix dumping with large number of sets If not table name is specified, the dumping of the existing sets may be incomplete with a sufficiently large number of sets and tables. This patch fixes missing reset of the cursors after finding the location of the last object that has been included in the previous multi-part message. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f93b7d06f4be9..d9fcd279942fe 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2098,17 +2098,21 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, struct netlink_callback *cb) { const struct nft_set *set; - unsigned int idx = 0, s_idx = cb->args[0]; + unsigned int idx, s_idx = cb->args[0]; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; if (cb->args[1]) return skb->len; list_for_each_entry(table, &ctx->afi->tables, list) { - if (cur_table && cur_table != table) - continue; + if (cur_table) { + if (cur_table != table) + continue; + cur_table = NULL; + } ctx->table = table; + idx = 0; list_for_each_entry(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; From d2012975619251bdfeb7a5159faa7727ea9cddd3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 27 Dec 2013 10:44:23 +0100 Subject: [PATCH 084/164] netfilter: nf_tables: fix oops when updating table with user chains This patch fixes a crash while trying to deactivate a table that contains user chains. You can reproduce it via: % nft add table table1 % nft add chain table1 chain1 % nft-table-upd ip table1 dormant [ 253.021026] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 [ 253.021114] IP: [] nf_register_hook+0x35/0x6f [ 253.021167] PGD 30fa5067 PUD 30fa2067 PMD 0 [ 253.021208] Oops: 0000 [#1] SMP [...] [ 253.023305] Call Trace: [ 253.023331] [] nf_tables_newtable+0x11c/0x258 [nf_tables] [ 253.023385] [] nfnetlink_rcv_msg+0x1f4/0x226 [nfnetlink] [ 253.023438] [] ? nfnetlink_rcv_msg+0x7a/0x226 [nfnetlink] [ 253.023491] [] ? nfnetlink_bind+0x45/0x45 [nfnetlink] [ 253.023542] [] netlink_rcv_skb+0x3c/0x88 [ 253.023586] [] nfnetlink_rcv+0x3af/0x3e4 [nfnetlink] [ 253.023638] [] ? _raw_read_unlock+0x22/0x34 [ 253.023683] [] netlink_unicast+0xe2/0x161 [ 253.023727] [] netlink_sendmsg+0x304/0x332 [ 253.023773] [] __sock_sendmsg_nosec+0x25/0x27 [ 253.023820] [] sock_sendmsg+0x5a/0x7b [ 253.023861] [] ? copy_from_user+0x2a/0x2c [ 253.023905] [] ? move_addr_to_kernel+0x35/0x60 [ 253.023952] [] SYSC_sendto+0x119/0x15c [ 253.023995] [] ? sysret_check+0x1b/0x56 [ 253.024039] [] ? trace_hardirqs_on_caller+0x140/0x1db [ 253.024090] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 253.024141] [] SyS_sendto+0x9/0xb [ 253.026219] [] system_call_fastpath+0x16/0x1b Reported-by: Alex Wei Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d9fcd279942fe..d65c80b0e84d7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -312,6 +312,9 @@ static int nf_tables_table_enable(struct nft_table *table) int err, i = 0; list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + err = nf_register_hook(&nft_base_chain(chain)->ops); if (err < 0) goto err; @@ -321,6 +324,9 @@ static int nf_tables_table_enable(struct nft_table *table) return 0; err: list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + if (i-- <= 0) break; @@ -333,8 +339,10 @@ static int nf_tables_table_disable(struct nft_table *table) { struct nft_chain *chain; - list_for_each_entry(chain, &table->chains, list) - nf_unregister_hook(&nft_base_chain(chain)->ops); + list_for_each_entry(chain, &table->chains, list) { + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + } return 0; } From 46b76e0b8b5e21fa5a2387d6f72b193514e7f722 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 2 Dec 2013 20:38:30 +0100 Subject: [PATCH 085/164] batman-adv: fix alignment for batadv_coded_packet The compiler may decide to pad the structure, and then it does not have the expected size of 46 byte. Fix this by moving it in the pragma pack(2) part of the code. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/packet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 207459b62966d..10597a6729a6f 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -315,8 +315,6 @@ struct batadv_bcast_packet { */ }; -#pragma pack() - /** * struct batadv_coded_packet - network coded packet * @header: common batman packet header and ttl of first included packet @@ -349,6 +347,8 @@ struct batadv_coded_packet { __be16 coded_len; }; +#pragma pack() + /** * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload * @header: common batman packet header From a40d9b075c21f06872de3f05cc2eb3d06665e2ff Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 2 Dec 2013 20:38:31 +0100 Subject: [PATCH 086/164] batman-adv: fix header alignment by unrolling batadv_header The size of the batadv_header of 3 is problematic on some architectures which automatically pad all structures to a 32 bit boundary. To not lose performance by packing this struct, better embed it into the various host structures. Reported-by: Russell King Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 36 ++++++------- net/batman-adv/distributed-arp-table.c | 6 +-- net/batman-adv/fragmentation.c | 8 +-- net/batman-adv/icmp_socket.c | 6 +-- net/batman-adv/main.c | 12 ++--- net/batman-adv/network-coding.c | 22 ++++---- net/batman-adv/packet.h | 74 ++++++++++++++++++-------- net/batman-adv/routing.c | 18 +++---- net/batman-adv/send.c | 10 ++-- net/batman-adv/soft-interface.c | 11 ++-- 10 files changed, 118 insertions(+), 85 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a2b480a908723..b9c8a6eedf453 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -307,9 +307,9 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) hard_iface->bat_iv.ogm_buff = ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; - batadv_ogm_packet->header.packet_type = BATADV_IV_OGM; - batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; - batadv_ogm_packet->header.ttl = 2; + batadv_ogm_packet->packet_type = BATADV_IV_OGM; + batadv_ogm_packet->version = BATADV_COMPAT_VERSION; + batadv_ogm_packet->ttl = 2; batadv_ogm_packet->flags = BATADV_NO_FLAGS; batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; @@ -346,7 +346,7 @@ batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; - batadv_ogm_packet->header.ttl = BATADV_TTL; + batadv_ogm_packet->ttl = BATADV_TTL; } /* when do we schedule our own ogm to be sent */ @@ -435,7 +435,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, fwd_str, (packet_num > 0 ? "aggregated " : ""), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl, + batadv_ogm_packet->tq, batadv_ogm_packet->ttl, (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? "on" : "off"), hard_iface->net_dev->name, @@ -491,7 +491,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) /* multihomed peer assumed * non-primary OGMs are only broadcasted on their interface */ - if ((directlink && (batadv_ogm_packet->header.ttl == 1)) || + if ((directlink && (batadv_ogm_packet->ttl == 1)) || (forw_packet->own && (forw_packet->if_incoming != primary_if))) { /* FIXME: what about aggregated packets ? */ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -499,7 +499,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) (forw_packet->own ? "Sending own" : "Forwarding"), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->header.ttl, + batadv_ogm_packet->ttl, forw_packet->if_incoming->net_dev->name, forw_packet->if_incoming->net_dev->dev_addr); @@ -572,7 +572,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, */ if ((!directlink) && (!(batadv_ogm_packet->flags & BATADV_DIRECTLINK)) && - (batadv_ogm_packet->header.ttl != 1) && + (batadv_ogm_packet->ttl != 1) && /* own packets originating non-primary * interfaces leave only that interface @@ -587,7 +587,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, * interface only - we still can aggregate */ if ((directlink) && - (new_bat_ogm_packet->header.ttl == 1) && + (new_bat_ogm_packet->ttl == 1) && (forw_packet->if_incoming == if_incoming) && /* packets from direct neighbors or @@ -778,7 +778,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); uint16_t tvlv_len; - if (batadv_ogm_packet->header.ttl <= 1) { + if (batadv_ogm_packet->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); return; } @@ -798,7 +798,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, tvlv_len = ntohs(batadv_ogm_packet->tvlv_len); - batadv_ogm_packet->header.ttl--; + batadv_ogm_packet->ttl--; memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); /* apply hop penalty */ @@ -807,7 +807,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: tq: %i, ttl: %i\n", - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl); + batadv_ogm_packet->tq, batadv_ogm_packet->ttl); /* switch of primaries first hop flag when forwarding */ batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP; @@ -972,8 +972,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, spin_unlock_bh(&neigh_node->bat_iv.lq_update_lock); if (dup_status == BATADV_NO_DUP) { - orig_node->last_ttl = batadv_ogm_packet->header.ttl; - neigh_node->last_ttl = batadv_ogm_packet->header.ttl; + orig_node->last_ttl = batadv_ogm_packet->ttl; + neigh_node->last_ttl = batadv_ogm_packet->ttl; } batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node); @@ -1247,7 +1247,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, * packet in an aggregation. Here we expect that the padding * is always zero (or not 0x01) */ - if (batadv_ogm_packet->header.packet_type != BATADV_IV_OGM) + if (batadv_ogm_packet->packet_type != BATADV_IV_OGM) return; /* could be changed by schedule_own_packet() */ @@ -1267,8 +1267,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, batadv_ogm_packet->prev_sender, ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, - batadv_ogm_packet->header.ttl, - batadv_ogm_packet->header.version, has_directlink_flag); + batadv_ogm_packet->ttl, + batadv_ogm_packet->version, has_directlink_flag); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { @@ -1433,7 +1433,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, * seqno and similar ttl as the non-duplicate */ sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno); - similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl; + similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->ttl; if (is_bidirect && ((dup_status == BATADV_NO_DUP) || (sameseq && similar_ttl))) batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6c8c3934bd7b4..b316a4cb6f147 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -349,7 +349,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - switch (unicast_4addr_packet->u.header.packet_type) { + switch (unicast_4addr_packet->u.packet_type) { case BATADV_UNICAST: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within a UNICAST packet\n"); @@ -374,7 +374,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, break; default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } break; case BATADV_BCAST: @@ -387,7 +387,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within an unknown packet type (0x%x)\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } } diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 271d321b3a040..6ddb6145ffb56 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -355,7 +355,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES, skb->len + ETH_HLEN); - packet->header.ttl--; + packet->ttl--; batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = true; @@ -444,9 +444,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb, goto out_err; /* Create one header to be copied to all fragments */ - frag_header.header.packet_type = BATADV_UNICAST_FRAG; - frag_header.header.version = BATADV_COMPAT_VERSION; - frag_header.header.ttl = BATADV_TTL; + frag_header.packet_type = BATADV_UNICAST_FRAG; + frag_header.version = BATADV_COMPAT_VERSION; + frag_header.ttl = BATADV_TTL; frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno)); frag_header.reserved = 0; frag_header.no = 0; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 29ae4efe3543e..130cc3217e2b0 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -194,7 +194,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto free_skb; } - if (icmp_header->header.packet_type != BATADV_ICMP) { + if (icmp_header->packet_type != BATADV_ICMP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); len = -EINVAL; @@ -243,9 +243,9 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, icmp_header->uid = socket_client->index; - if (icmp_header->header.version != BATADV_COMPAT_VERSION) { + if (icmp_header->version != BATADV_COMPAT_VERSION) { icmp_header->msg_type = BATADV_PARAMETER_PROBLEM; - icmp_header->header.version = BATADV_COMPAT_VERSION; + icmp_header->version = BATADV_COMPAT_VERSION; batadv_socket_add_packet(socket_client, icmp_header, packet_len); goto free_skb; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c51a5e568f0a8..d87778b9c5b4e 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -383,17 +383,17 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { + if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); + batadv_ogm_packet->version); goto err_free; } /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ - idx = batadv_ogm_packet->header.packet_type; + idx = batadv_ogm_packet->packet_type; ret = (*batadv_rx_handler[idx])(skb, hard_iface); if (ret == NET_RX_DROP) @@ -1119,9 +1119,9 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, skb_reserve(skb, ETH_HLEN); tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; - unicast_tvlv_packet->header.packet_type = BATADV_UNICAST_TVLV; - unicast_tvlv_packet->header.version = BATADV_COMPAT_VERSION; - unicast_tvlv_packet->header.ttl = BATADV_TTL; + unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; + unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; + unicast_tvlv_packet->ttl = BATADV_TTL; unicast_tvlv_packet->reserved = 0; unicast_tvlv_packet->tvlv_len = htons(tvlv_len); unicast_tvlv_packet->align = 0; diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 351e199bc0aff..511d7e1eea38b 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -722,7 +722,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, { if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) return false; - if (orig_node->last_ttl != ogm_packet->header.ttl + 1) + if (orig_node->last_ttl != ogm_packet->ttl + 1) return false; if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) return false; @@ -1082,9 +1082,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, coded_packet = (struct batadv_coded_packet *)skb_dest->data; skb_reset_mac_header(skb_dest); - coded_packet->header.packet_type = BATADV_CODED; - coded_packet->header.version = BATADV_COMPAT_VERSION; - coded_packet->header.ttl = packet1->header.ttl; + coded_packet->packet_type = BATADV_CODED; + coded_packet->version = BATADV_COMPAT_VERSION; + coded_packet->ttl = packet1->ttl; /* Info about first unicast packet */ memcpy(coded_packet->first_source, first_source, ETH_ALEN); @@ -1097,7 +1097,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, memcpy(coded_packet->second_source, second_source, ETH_ALEN); memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); coded_packet->second_crc = packet_id2; - coded_packet->second_ttl = packet2->header.ttl; + coded_packet->second_ttl = packet2->ttl; coded_packet->second_ttvn = packet2->ttvn; coded_packet->coded_len = htons(coding_len); @@ -1452,7 +1452,7 @@ bool batadv_nc_skb_forward(struct sk_buff *skb, /* We only handle unicast packets */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Try to find a coding opportunity and send the skb if one is found */ @@ -1505,7 +1505,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, /* Check for supported packet type */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Find existing nc_path or create a new */ @@ -1623,7 +1623,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, ttvn = coded_packet_tmp.second_ttvn; } else { orig_dest = coded_packet_tmp.first_orig_dest; - ttl = coded_packet_tmp.header.ttl; + ttl = coded_packet_tmp.ttl; ttvn = coded_packet_tmp.first_ttvn; } @@ -1648,9 +1648,9 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, /* Create decoded unicast packet */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.packet_type = BATADV_UNICAST; - unicast_packet->header.version = BATADV_COMPAT_VERSION; - unicast_packet->header.ttl = ttl; + unicast_packet->packet_type = BATADV_UNICAST; + unicast_packet->version = BATADV_COMPAT_VERSION; + unicast_packet->ttl = ttl; memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); unicast_packet->ttvn = ttvn; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 10597a6729a6f..175ce7d721d64 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -164,23 +164,18 @@ struct batadv_bla_claim_dst { __be16 group; /* group id */ }; -struct batadv_header { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; - /* the parent struct has to add a byte after the header to make - * everything 4 bytes aligned again - */ -}; - /** * struct batadv_ogm_packet - ogm (routing protocol) packet - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @flags: contains routing relevant flags - see enum batadv_iv_flags * @tvlv_len: length of tvlv data following the ogm header */ struct batadv_ogm_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t flags; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -197,14 +192,18 @@ struct batadv_ogm_packet { /** * batadv_icmp_header - common ICMP header - * @header: common batman header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node * @uid: local ICMP socket identifier */ struct batadv_icmp_header { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; @@ -253,8 +252,18 @@ struct batadv_icmp_packet_rr { */ #pragma pack(2) +/** + * struct batadv_unicast_packet - unicast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @ttvn: translation table version number + * @dest: originator destination of the unicast packet + */ struct batadv_unicast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t ttvn; /* destination translation table version number */ uint8_t dest[ETH_ALEN]; /* "4 bytes boundary + 2 bytes" long to make the payload after the @@ -280,7 +289,9 @@ struct batadv_unicast_4addr_packet { /** * struct batadv_frag_packet - fragmented packet - * @header: common batman packet header with type, compatversion, and ttl + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence @@ -289,7 +300,9 @@ struct batadv_unicast_4addr_packet { * @total_size: size of the merged packet */ struct batadv_frag_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; #if defined(__BIG_ENDIAN_BITFIELD) uint8_t no:4; uint8_t reserved:4; @@ -305,8 +318,19 @@ struct batadv_frag_packet { __be16 total_size; }; +/** + * struct batadv_bcast_packet - broadcast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @reserved: reserved byte for alignment + * @seqno: sequence identification + * @orig: originator of the broadcast packet + */ struct batadv_bcast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -317,7 +341,9 @@ struct batadv_bcast_packet { /** * struct batadv_coded_packet - network coded packet - * @header: common batman packet header and ttl of first included packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: Align following fields to 2-byte boundaries * @first_source: original source of first included packet * @first_orig_dest: original destinal of first included packet @@ -332,7 +358,9 @@ struct batadv_bcast_packet { * @coded_len: length of network coded part of the payload */ struct batadv_coded_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t first_ttvn; /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */ uint8_t first_source[ETH_ALEN]; @@ -351,7 +379,9 @@ struct batadv_coded_packet { /** * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: reserved field (for packet alignment) * @src: address of the source * @dst: address of the destination @@ -359,7 +389,9 @@ struct batadv_coded_packet { * @align: 2 bytes to align the header to a 4 byte boundry */ struct batadv_unicast_tvlv_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; uint8_t dst[ETH_ALEN]; uint8_t src[ETH_ALEN]; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index d4114d775ad61..5b52d718a6eb8 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -308,7 +308,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, memcpy(icmph->dst, icmph->orig, ETH_ALEN); memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN); icmph->msg_type = BATADV_ECHO_REPLY; - icmph->header.ttl = BATADV_TTL; + icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); if (res != NET_XMIT_DROP) @@ -363,7 +363,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, ETH_ALEN); icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED; - icmp_packet->icmph.header.ttl = BATADV_TTL; + icmp_packet->icmph.ttl = BATADV_TTL; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; @@ -434,7 +434,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, return batadv_recv_my_icmp_packet(bat_priv, skb); /* TTL exceeded */ - if (icmph->header.ttl < 2) + if (icmph->ttl < 2) return batadv_recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ @@ -449,7 +449,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph = (struct batadv_icmp_header *)skb->data; /* decrement ttl */ - icmph->header.ttl--; + icmph->ttl--; /* route it */ if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) @@ -709,7 +709,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; /* TTL exceeded */ - if (unicast_packet->header.ttl < 2) { + if (unicast_packet->ttl < 2) { pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); goto out; @@ -727,9 +727,9 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.ttl--; + unicast_packet->ttl--; - switch (unicast_packet->header.packet_type) { + switch (unicast_packet->packet_type) { case BATADV_UNICAST_4ADDR: hdr_len = sizeof(struct batadv_unicast_4addr_packet); break; @@ -970,7 +970,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR; + is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); @@ -1160,7 +1160,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; - if (bcast_packet->header.ttl < 2) + if (bcast_packet->ttl < 2) goto out; orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index c83be5ebaa285..fba4dcfcfac21 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -161,11 +161,11 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, return false; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.version = BATADV_COMPAT_VERSION; + unicast_packet->version = BATADV_COMPAT_VERSION; /* batman packet type: unicast */ - unicast_packet->header.packet_type = BATADV_UNICAST; + unicast_packet->packet_type = BATADV_UNICAST; /* set unicast ttl */ - unicast_packet->header.ttl = BATADV_TTL; + unicast_packet->ttl = BATADV_TTL; /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); /* set the destination tt version number */ @@ -221,7 +221,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, goto out; uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - uc_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; + uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR; memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); uc_4addr_packet->subtype = packet_subtype; uc_4addr_packet->reserved = 0; @@ -436,7 +436,7 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; - bcast_packet->header.ttl--; + bcast_packet->ttl--; skb_reset_mac_header(newskb); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 36f050876f826..b4881f8e28e81 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -264,11 +264,11 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; bcast_packet = (struct batadv_bcast_packet *)skb->data; - bcast_packet->header.version = BATADV_COMPAT_VERSION; - bcast_packet->header.ttl = BATADV_TTL; + bcast_packet->version = BATADV_COMPAT_VERSION; + bcast_packet->ttl = BATADV_TTL; /* batman packet type: broadcast */ - bcast_packet->header.packet_type = BATADV_BCAST; + bcast_packet->packet_type = BATADV_BCAST; bcast_packet->reserved = 0; /* hw address of first interface is the orig mac because only @@ -328,7 +328,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, int hdr_size, struct batadv_orig_node *orig_node) { - struct batadv_header *batadv_header = (struct batadv_header *)skb->data; + struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(soft_iface); __be16 ethertype = htons(ETH_P_BATMAN); struct vlan_ethhdr *vhdr; @@ -336,7 +336,8 @@ void batadv_interface_rx(struct net_device *soft_iface, unsigned short vid; bool is_bcast; - is_bcast = (batadv_header->packet_type == BATADV_BCAST); + batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; + is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); /* check if enough space is available for pulling, and pull */ if (!pskb_may_pull(skb, hdr_size)) From 27a417e6badac911487c10990e04f5ccbb11b1b2 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 5 Dec 2013 15:33:00 +0100 Subject: [PATCH 087/164] batman-adv: fix size of batadv_icmp_header struct batadv_icmp_header currently has a size of 17, which will be padded to 20 on some architectures. Fix this by unrolling the header into the parent structures. Moreover keep the ICMP parsing functions as generic as they are now by using a stub icmp_header struct during packet parsing. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/main.c | 4 ++-- net/batman-adv/packet.h | 40 +++++++++++++++++++++++++++++++++++----- net/batman-adv/routing.c | 14 +++++++------- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d87778b9c5b4e..1511f64a6ceaa 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -426,8 +426,8 @@ static void batadv_recv_handler_init(void) BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, icmph.dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, icmph.dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4); /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 175ce7d721d64..2a857ed006fd5 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -191,7 +191,7 @@ struct batadv_ogm_packet { #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) /** - * batadv_icmp_header - common ICMP header + * batadv_icmp_header - common members among all the ICMP packets * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header * @ttl: time to live for this packet, part of the genereal header @@ -199,6 +199,11 @@ struct batadv_ogm_packet { * @dst: address of the destination node * @orig: address of the source node * @uid: local ICMP socket identifier + * @align: not used - useful for alignment purposes only + * + * This structure is used for ICMP packets parsing only and it is never sent + * over the wire. The alignment field at the end is there to ensure that + * members are padded the same way as they are in real packets. */ struct batadv_icmp_header { uint8_t packet_type; @@ -208,16 +213,29 @@ struct batadv_icmp_header { uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; uint8_t uid; + uint8_t align[3]; }; /** * batadv_icmp_packet - ICMP packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @reserved: not used - useful for alignment * @seqno: ICMP sequence number */ struct batadv_icmp_packet { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t reserved; __be16 seqno; }; @@ -226,13 +244,25 @@ struct batadv_icmp_packet { /** * batadv_icmp_packet_rr - ICMP RouteRecord packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @rr_cur: number of entries the rr array * @seqno: ICMP sequence number * @rr: route record array */ struct batadv_icmp_packet_rr { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t rr_cur; __be16 seqno; uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 5b52d718a6eb8..46278bfb8fdb9 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -338,9 +338,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; /* send TTL exceeded if packet is an echo request (traceroute) */ - if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { + if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n", - icmp_packet->icmph.orig, icmp_packet->icmph.dst); + icmp_packet->orig, icmp_packet->dst); goto out; } @@ -349,7 +349,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, goto out; /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); if (!orig_node) goto out; @@ -359,11 +359,11 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; - memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN); - memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED; - icmp_packet->icmph.ttl = BATADV_TTL; + icmp_packet->msg_type = BATADV_TTL_EXCEEDED; + icmp_packet->ttl = BATADV_TTL; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; From 2f7a318219186485c175339758ac42a03644200b Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 2 Dec 2013 20:38:33 +0100 Subject: [PATCH 088/164] batman-adv: fix size of batadv_bla_claim_dst Since this is a mac address and always 48 bit, and we can assume that it is always aligned to 2-byte boundaries, add a pack(2) pragma. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/packet.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 2a857ed006fd5..04cf27ca5dee5 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -155,6 +155,7 @@ enum batadv_tvlv_type { BATADV_TVLV_ROAM = 0x05, }; +#pragma pack(2) /* the destination hardware field in the ARP frame is used to * transport the claim type and the group id */ @@ -163,6 +164,7 @@ struct batadv_bla_claim_dst { uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ }; +#pragma pack() /** * struct batadv_ogm_packet - ogm (routing protocol) packet From ca6630464454d74ae1d430e99007a1139f4a2aba Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sun, 15 Dec 2013 13:26:55 +0100 Subject: [PATCH 089/164] batman-adv: fix alignment for batadv_tvlv_tt_change Make struct batadv_tvlv_tt_change a multiple 4 bytes long to avoid padding on any architecture. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/packet.h | 4 ++-- net/batman-adv/translation-table.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 04cf27ca5dee5..2dd8f2422550c 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -484,13 +484,13 @@ struct batadv_tvlv_tt_vlan_data { * struct batadv_tvlv_tt_change - translation table diff data * @flags: status indicators concerning the non-mesh client (see * batadv_tt_client_flags) - * @reserved: reserved field + * @reserved: reserved field - useful for alignment purposes only * @addr: mac address of non-mesh client that triggered this tt change * @vid: VLAN identifier */ struct batadv_tvlv_tt_change { uint8_t flags; - uint8_t reserved; + uint8_t reserved[3]; uint8_t addr[ETH_ALEN]; __be16 vid; }; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4add57d4857f1..ff625fedbc5ee 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -333,7 +333,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - tt_change_node->change.reserved = 0; + memset(tt_change_node->change.reserved, 0, + sizeof(tt_change_node->change.reserved)); memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); tt_change_node->change.vid = htons(common->vid); @@ -2221,7 +2222,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, ETH_ALEN); tt_change->flags = tt_common_entry->flags; tt_change->vid = htons(tt_common_entry->vid); - tt_change->reserved = 0; + memset(tt_change->reserved, 0, + sizeof(tt_change->reserved)); tt_num_entries++; tt_change++; From 55883fd1048e09f5b6e1edaf0caf7e4f6f31f971 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 23 Dec 2013 01:28:05 +0100 Subject: [PATCH 090/164] batman-adv: clean nf state when removing protocol header If an interface enslaved into batman-adv is a bridge (or a virtual interface built on top of a bridge) the nf_bridge member of the skbs reaching the soft-interface is filled with the state about "netfilter bridge" operations. Then, if one of such skbs is locally delivered, the nf_bridge member should be cleaned up to avoid that the old state could mess up with other "netfilter bridge" operations when entering a second bridge. This is needed because batman-adv is an encapsulation protocol. However at the moment skb->nf_bridge is not released at all leading to bogus "netfilter bridge" behaviours. Fix this by cleaning the netfilter state of the skb before it gets delivered to the upper layer in interface_rx(). Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/soft-interface.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b4881f8e28e81..875a702542c2f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -346,6 +346,11 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); + /* clean the netfilter state now that the batman-adv header has been + * removed + */ + nf_reset(skb); + vid = batadv_get_vid(skb, hdr_size); ethhdr = eth_hdr(skb); From 2b1e2cb3594df80446dc33bb8e12230da11f38ff Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 23 Dec 2013 21:43:39 +0100 Subject: [PATCH 091/164] batman-adv: fix vlan header access When batadv_get_vid() is invoked in interface_rx() the batman-adv header has already been removed, therefore the header_len argument has to be 0. Introduced by c018ad3de61a1dc4194879a53e5559e094aa7b1a ("batman-adv: add the VLAN ID attribute to the TT entry") Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/soft-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 875a702542c2f..a8f99d1486c04 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -351,7 +351,7 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); - vid = batadv_get_vid(skb, hdr_size); + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { From 2ee0d3c80fdb7974cfa1c7e25b5048e9fcaf69b6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 28 Dec 2013 00:59:38 +0100 Subject: [PATCH 092/164] netfilter: nf_tables: fix wrong datatype in nft_validate_data_load() This patch fixes dictionary mappings, eg. add rule ip filter input meta dnat set tcp dport map { 22 => 1.1.1.1, 23 => 2.2.2.2 } The kernel was returning -EINVAL in nft_validate_data_load() since the type of the set element data that is passed was the real userspace datatype instead of NFT_DATA_VALUE. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d65c80b0e84d7..71a9f49a768b8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2382,7 +2382,9 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, + set->dtype == NFT_DATA_VERDICT ? + NFT_DATA_VERDICT : NFT_DATA_VALUE); } int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, From 72368d122c7479aa6e14fbbd334717b8a0c157a6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Dec 2013 01:07:11 +0100 Subject: [PATCH 093/164] cpufreq: Clean up after a failing light-weight initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If cpufreq_policy_restore() returns NULL during system resume, __cpufreq_add_dev() should just fall back to the full initialization instead of returning an error, because that may actually make things work. Moreover, it should not leave stale fallback data behind after it has failed to restore a previously existing policy. This change is based on Viresh Kumar's work. Fixes: 5302c3fb2e62 ("cpufreq: Perform light-weight init/teardown during suspend/resume") Reported-by: Bjørn Mork Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 3.12+ # 3.12+ --- drivers/cpufreq/cpufreq.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 16d7b4ac94be2..f13a663d1da50 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1016,15 +1016,17 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, read_unlock_irqrestore(&cpufreq_driver_lock, flags); #endif - if (frozen) - /* Restore the saved policy when doing light-weight init */ - policy = cpufreq_policy_restore(cpu); - else + /* + * Restore the saved policy when doing light-weight init and fall back + * to the full init if that fails. + */ + policy = frozen ? cpufreq_policy_restore(cpu) : NULL; + if (!policy) { + frozen = false; policy = cpufreq_policy_alloc(); - - if (!policy) - goto nomem_out; - + if (!policy) + goto nomem_out; + } /* * In the resume path, since we restore a saved policy, the assignment @@ -1118,8 +1120,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); err_set_policy_cpu: - if (frozen) + if (frozen) { + /* Do not leave stale fallback data behind. */ + per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL; cpufreq_policy_put_kobj(policy); + } cpufreq_policy_free(policy); nomem_out: From 08fd8c1cf0a99abf34e09a8b99b74872e0d73a23 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 24 Dec 2013 07:11:01 +0530 Subject: [PATCH 094/164] cpufreq: preserve user_policy across suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent __cpufreq_add_dev() from overwriting the existing values of user_policy.{min|max|policy|governor} with defaults during resume from system suspend. Fixes: 5302c3fb2e62 ("cpufreq: Perform light-weight init/teardown during suspend/resume") Reported-by: Bjørn Mork Signed-off-by: Viresh Kumar Cc: 3.12+ # 3.12+ [rjw: Changelog] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f13a663d1da50..8d19f7c06010c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -839,9 +839,6 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) /* set default policy */ ret = cpufreq_set_policy(policy, &new_policy); - policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; - if (ret) { pr_debug("setting policy failed\n"); if (cpufreq_driver->exit) @@ -1071,8 +1068,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, */ cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); - policy->user_policy.min = policy->min; - policy->user_policy.max = policy->max; + if (!frozen) { + policy->user_policy.min = policy->min; + policy->user_policy.max = policy->max; + } blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); @@ -1103,6 +1102,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, cpufreq_init_policy(policy); + if (!frozen) { + policy->user_policy.policy = policy->policy; + policy->user_policy.governor = policy->governor; + } + kobject_uevent(&policy->kobj, KOBJ_ADD); up_read(&cpufreq_rwsem); From f084280cd34e375cd9736d14b023cfc7df3374bb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Dec 2013 23:37:15 +0100 Subject: [PATCH 095/164] PCI / ACPI: Install wakeup notify handlers for all PCI devs with ACPI It turns out that some BIOSes don't report wakeup GPEs through _PRW, but use them for signaling wakeup anyway, which causes GPE storms to occur on some systems after resume from system suspend. This issue has been uncovered by commit d2e5f0c16ad6 (ACPI / PCI: Rework the setup and cleanup of device wakeup) during the 3.9 development cycle. Work around the problem by installing wakeup notify handlers for all PCI devices with ACPI support (i.e. having ACPI companions) regardless of whether or not the BIOS reports ACPI wakeup support for them. The presence of the wakeup notify handlers alone is not harmful in any way if there are no events for them to handle (they are simply never executed then), but on some systems they are needed to take care of spurious events. Fixes: d2e5f0c16ad6 (ACPI / PCI: Rework the setup and cleanup of device wakeup) References: https://bugzilla.kernel.org/show_bug.cgi?id=63021 Reported-and-tested-by: Agustin Barto Cc: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 577074efbe62f..f7ebdba14bde9 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -330,29 +330,32 @@ static int acpi_pci_find_device(struct device *dev, acpi_handle *handle) static void pci_acpi_setup(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - acpi_handle handle = ACPI_HANDLE(dev); - struct acpi_device *adev; + struct acpi_device *adev = ACPI_COMPANION(dev); - if (acpi_bus_get_device(handle, &adev) || !adev->wakeup.flags.valid) + if (!adev) + return; + + pci_acpi_add_pm_notifier(adev, pci_dev); + if (!adev->wakeup.flags.valid) return; device_set_wakeup_capable(dev, true); acpi_pci_sleep_wake(pci_dev, false); - - pci_acpi_add_pm_notifier(adev, pci_dev); if (adev->wakeup.flags.run_wake) device_set_run_wake(dev, true); } static void pci_acpi_cleanup(struct device *dev) { - acpi_handle handle = ACPI_HANDLE(dev); - struct acpi_device *adev; + struct acpi_device *adev = ACPI_COMPANION(dev); + + if (!adev) + return; - if (!acpi_bus_get_device(handle, &adev) && adev->wakeup.flags.valid) { + pci_acpi_remove_pm_notifier(adev); + if (adev->wakeup.flags.valid) { device_set_wakeup_capable(dev, false); device_set_run_wake(dev, false); - pci_acpi_remove_pm_notifier(adev); } } From 90ff5d688e61f49f23545ffab6228bd7e87e6dc7 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 16 Dec 2013 15:12:43 +1100 Subject: [PATCH 096/164] powerpc: Fix bad stack check in exception entry In EXCEPTION_PROLOG_COMMON() we check to see if the stack pointer (r1) is valid when coming from the kernel. If it's not valid, we die but with a nice oops message. Currently we allocate a stack frame (subtract INT_FRAME_SIZE) before we check to see if the stack pointer is negative. Unfortunately, this won't detect a bad stack where r1 is less than INT_FRAME_SIZE. This patch fixes the check to compare the modified r1 with -INT_FRAME_SIZE. With this, bad kernel stack pointers (including NULL pointers) are correctly detected again. Kudos to Paulus for finding this. Signed-off-by: Michael Neuling cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 894662a5d4d5c..243ce69ad685f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -284,7 +284,7 @@ do_kvm_##n: \ subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ beq- 1f; \ ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ -1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ +1: cmpdi cr1,r1,-INT_FRAME_SIZE; /* check if r1 is in userspace */ \ blt+ cr1,3f; /* abort if it is */ \ li r1,(n); /* will be reloaded later */ \ sth r1,PACA_TRAP_SAVE(r13); \ From e8a00ad5e238421ded856ea39f692b94c2d324eb Mon Sep 17 00:00:00 2001 From: Rajesh B Prathipati Date: Mon, 16 Dec 2013 18:58:22 +1100 Subject: [PATCH 097/164] powerpc: Make unaligned accesses endian-safe for powerpc The generic put_unaligned/get_unaligned macros were made endian-safe by calling the appropriate endian dependent macros based on the endian type of the powerpc processor. Signed-off-by: Rajesh B Prathipati Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/unaligned.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/unaligned.h b/arch/powerpc/include/asm/unaligned.h index 5f1b1e3c21374..8296381ae4329 100644 --- a/arch/powerpc/include/asm/unaligned.h +++ b/arch/powerpc/include/asm/unaligned.h @@ -4,13 +4,18 @@ #ifdef __KERNEL__ /* - * The PowerPC can do unaligned accesses itself in big endian mode. + * The PowerPC can do unaligned accesses itself based on its endian mode. */ #include #include +#ifdef __LITTLE_ENDIAN__ +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le +#else #define get_unaligned __get_unaligned_be #define put_unaligned __put_unaligned_be +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UNALIGNED_H */ From 20151169f1de4b170368fdb574024027620d0d49 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 18 Dec 2013 09:29:57 +1100 Subject: [PATCH 098/164] powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian The powerpc 64-bit __copy_tofrom_user() function uses shifts to handle unaligned invocations. However, these shifts were designed for big-endian systems: On little-endian systems, they must shift in the opposite direction. This commit relies on the C preprocessor to insert the correct shifts into the assembly code. [ This is a rare but nasty LE issue. Most of the time we use the POWER7 optimised __copy_tofrom_user_power7 loop, but when it hits an exception we fall back to the base __copy_tofrom_user loop. - Anton ] Signed-off-by: Paul E. McKenney Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/copyuser_64.S | 53 ++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index d73a590149001..596a285c07554 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -9,6 +9,14 @@ #include #include +#ifdef __BIG_ENDIAN__ +#define sLd sld /* Shift towards low-numbered address. */ +#define sHd srd /* Shift towards high-numbered address. */ +#else +#define sLd srd /* Shift towards low-numbered address. */ +#define sHd sld /* Shift towards high-numbered address. */ +#endif + .align 7 _GLOBAL(__copy_tofrom_user) BEGIN_FTR_SECTION @@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 25: ld r0,8(r4) - sld r6,r9,r10 + sLd r6,r9,r10 26: ldu r9,16(r4) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f 27: ld r0,8(r4) @@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 29: ldu r9,8(r4) - sld r8,r0,r10 + sLd r8,r0,r10 addi r3,r3,-8 blt cr6,5f 30: ld r0,8(r4) - srd r12,r9,r11 - sld r6,r9,r10 + sHd r12,r9,r11 + sLd r6,r9,r10 31: ldu r9,16(r4) or r12,r8,r12 - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 addi r3,r3,16 beq cr6,78f 1: or r7,r7,r6 32: ld r0,8(r4) 76: std r12,8(r3) -2: srd r12,r9,r11 - sld r6,r9,r10 +2: sHd r12,r9,r11 + sLd r6,r9,r10 33: ldu r9,16(r4) or r12,r8,r12 77: stdu r7,16(r3) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 bdnz 1b 78: std r12,8(r3) or r7,r7,r6 79: std r7,16(r3) -5: srd r12,r9,r11 +5: sHd r12,r9,r11 or r12,r8,r12 80: std r12,24(r3) bne 6f @@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr 6: cmpwi cr1,r5,8 addi r3,r3,32 - sld r9,r9,r10 + sLd r9,r9,r10 ble cr1,7f 34: ld r0,8(r4) - srd r7,r0,r11 + sHd r7,r0,r11 or r9,r7,r9 7: bf cr7*4+1,1f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,32 +#endif 94: stw r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,32 +#endif addi r3,r3,4 1: bf cr7*4+2,2f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,16 +#endif 95: sth r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,16 +#endif addi r3,r3,2 2: bf cr7*4+3,3f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,8 +#endif 96: stb r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,8 +#endif 3: li r3,0 blr From 20acebdfaec3e15d8a27ab25bf4a2f91b2afa757 Mon Sep 17 00:00:00 2001 From: Brian W Hart Date: Thu, 19 Dec 2013 17:14:07 -0600 Subject: [PATCH 099/164] powernv/eeh: Fix possible buffer overrun in ioda_eeh_phb_diag() PHB diagnostic buffer may be smaller than PAGE_SIZE, especially when PAGE_SIZE > 4KB. Signed-off-by: Brian W Hart Acked-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/eeh-ioda.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 02245cee78183..8184ef5ccb1a6 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -820,14 +820,15 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose) struct OpalIoPhbErrorCommon *common; long rc; - common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; - rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE); + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + PNV_PCI_DIAG_BUF_SIZE); if (rc != OPAL_SUCCESS) { pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", __func__, hose->global_number, rc); return; } + common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; switch (common->ioType) { case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: ioda_eeh_p7ioc_phb_diag(hose, common); From ca1de5deb782e1636ed5b898e215a8840ae39230 Mon Sep 17 00:00:00 2001 From: Brian W Hart Date: Fri, 20 Dec 2013 13:06:01 -0600 Subject: [PATCH 100/164] powernv/eeh: Add buffer for P7IOC hub error data Prevent ioda_eeh_hub_diag() from clobbering itself when called by supplying a per-PHB buffer for P7IOC hub diagnostic data. Take care to inform OPAL of the correct size for the buffer. [Small style change to the use of sizeof -- BenH] Signed-off-by: Brian W Hart Acked-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/eeh-ioda.c | 15 ++------------- arch/powerpc/platforms/powernv/pci.h | 4 +++- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 8184ef5ccb1a6..d7ddcee7feb8b 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -36,7 +36,6 @@ #include "powernv.h" #include "pci.h" -static char *hub_diag = NULL; static int ioda_eeh_nb_init = 0; static int ioda_eeh_event(struct notifier_block *nb, @@ -140,15 +139,6 @@ static int ioda_eeh_post_init(struct pci_controller *hose) ioda_eeh_nb_init = 1; } - /* We needn't HUB diag-data on PHB3 */ - if (phb->type == PNV_PHB_IODA1 && !hub_diag) { - hub_diag = (char *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - if (!hub_diag) { - pr_err("%s: Out of memory !\n", __func__); - return -ENOMEM; - } - } - #ifdef CONFIG_DEBUG_FS if (phb->dbgfs) { debugfs_create_file("err_injct_outbound", 0600, @@ -633,11 +623,10 @@ static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) static void ioda_eeh_hub_diag(struct pci_controller *hose) { struct pnv_phb *phb = hose->private_data; - struct OpalIoP7IOCErrorData *data; + struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag; long rc; - data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag; - rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE); + rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); if (rc != OPAL_SUCCESS) { pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", __func__, phb->hub_id, rc); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 911c24ef033e0..1ed8d5f40f5ad 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -172,11 +172,13 @@ struct pnv_phb { } ioda; }; - /* PHB status structure */ + /* PHB and hub status structure */ union { unsigned char blob[PNV_PCI_DIAG_BUF_SIZE]; struct OpalIoP7IOCPhbErrorData p7ioc; + struct OpalIoP7IOCErrorData hub_diag; } diag; + }; extern struct pci_ops pnv_pci_ops; From 286e4f90a72c0b0621dde0294af6ed4b0baddabb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 23 Dec 2013 12:19:51 +1100 Subject: [PATCH 101/164] powerpc: Align p_end p_end is an 8 byte value embedded in the text section. This means it is only 4 byte aligned when it should be 8 byte aligned. Fix this by adding an explicit alignment. This fixes an issue where POWER7 little endian builds with CONFIG_RELOCATABLE=y fail to boot. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2ae41aba40530..fad2abdcbdfed 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -470,6 +470,7 @@ _STATIC(__after_prom_start) mtctr r8 bctr +.balign 8 p_end: .llong _end - _stext 4: /* Now copy the rest of the kernel up to _end */ From 7d4151b5098fb0bf7f6f8d1156e1ab9d83260580 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sat, 28 Dec 2013 13:01:47 -0800 Subject: [PATCH 102/164] powerpc: Fix alignment of secondary cpu spin vars Commit 5c0484e25ec0 ('powerpc: Endian safe trampoline') resulted in losing proper alignment of the spinlock variables used when booting secondary CPUs, causing some quite odd issues with failing to boot on PA Semi-based systems. This showed itself on ppc64_defconfig, but not on pasemi_defconfig, so it had gone unnoticed when I initially tested the LE patch set. Fix is to add explicit alignment instead of relying on good luck. :) [ It appears that there is a different issue with PA Semi systems however this fix is definitely correct so applying anyway -- BenH ] Fixes: 5c0484e25ec0 ('powerpc: Endian safe trampoline') Reported-by: Christian Zigotzky Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=67811 Signed-off-by: Olof Johansson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index fad2abdcbdfed..4f0946de2d5c9 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -80,6 +80,7 @@ END_FTR_SECTION(0, 1) * of the function that the cpu should jump to to continue * initialization. */ + .balign 8 .globl __secondary_hold_spinloop __secondary_hold_spinloop: .llong 0x0 From 84602761ca4495dd409be936dfa93ed20c946684 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 27 Dec 2013 10:18:28 +0800 Subject: [PATCH 103/164] tipc: fix deadlock during socket release A deadlock might occur if name table is withdrawn in socket release routine, and while packets are still being received from bearer. CPU0 CPU1 T0: recv_msg() release() T1: tipc_recv_msg() tipc_withdraw() T2: [grab node lock] [grab port lock] T3: tipc_link_wakeup_ports() tipc_nametbl_withdraw() T4: [grab port lock]* named_cluster_distribute() T5: wakeupdispatch() tipc_link_send() T6: [grab node lock]* The opposite order of holding port lock and node lock on above two different paths may result in a deadlock. If socket lock instead of port lock is used to protect port instance in tipc_withdraw(), the reverse order of holding port lock and node lock will be eliminated, as a result, the deadlock is killed as well. Reported-by: Lars Everbrand Reviewed-by: Erik Hugne Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/port.c | 45 +++++++++++++++------------------------------ net/tipc/port.h | 6 +++--- net/tipc/socket.c | 46 +++++++++++++++++++++++++++++++--------------- 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/net/tipc/port.c b/net/tipc/port.c index c081a7632302c..d43f3182b1d4c 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -251,18 +251,15 @@ struct tipc_port *tipc_createport(struct sock *sk, return p_ptr; } -int tipc_deleteport(u32 ref) +int tipc_deleteport(struct tipc_port *p_ptr) { - struct tipc_port *p_ptr; struct sk_buff *buf = NULL; - tipc_withdraw(ref, 0, NULL); - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; + tipc_withdraw(p_ptr, 0, NULL); - tipc_ref_discard(ref); - tipc_port_unlock(p_ptr); + spin_lock_bh(p_ptr->lock); + tipc_ref_discard(p_ptr->ref); + spin_unlock_bh(p_ptr->lock); k_cancel_timer(&p_ptr->timer); if (p_ptr->connected) { @@ -704,47 +701,36 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) } -int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; u32 key; - int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) + if (p_ptr->connected) return -EINVAL; + key = p_ptr->ref + p_ptr->pub_count + 1; + if (key == p_ptr->ref) + return -EADDRINUSE; - if (p_ptr->connected) - goto exit; - key = ref + p_ptr->pub_count + 1; - if (key == ref) { - res = -EADDRINUSE; - goto exit; - } publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, scope, p_ptr->ref, key); if (publ) { list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; p_ptr->published = 1; - res = 0; + return 0; } -exit: - tipc_port_unlock(p_ptr); - return res; + return -EINVAL; } -int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; struct publication *tpubl; int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; if (!seq) { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -771,7 +757,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } if (list_empty(&p_ptr->publications)) p_ptr->published = 0; - tipc_port_unlock(p_ptr); return res; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 9122535973430..34f12bd4074e4 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -116,7 +116,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err); void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_deleteport(u32 portref); +int tipc_deleteport(struct tipc_port *p_ptr); int tipc_portimportance(u32 portref, unsigned int *importance); int tipc_set_portimportance(u32 portref, unsigned int importance); @@ -127,9 +127,9 @@ int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); -int tipc_publish(u32 portref, unsigned int scope, +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); -int tipc_withdraw(u32 portref, unsigned int scope, +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3b61851bb9276..e741416d1d24d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -354,7 +354,7 @@ static int release(struct socket *sock) * Delete TIPC port; this ensures no more messages are queued * (also disconnects an active connection & sends a 'FIN-' to peer) */ - res = tipc_deleteport(tport->ref); + res = tipc_deleteport(tport); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -386,30 +386,46 @@ static int release(struct socket *sock) */ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) { + struct sock *sk = sock->sk; struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - u32 portref = tipc_sk_port(sock->sk)->ref; + struct tipc_port *tport = tipc_sk_port(sock->sk); + int res = -EINVAL; - if (unlikely(!uaddr_len)) - return tipc_withdraw(portref, 0, NULL); + lock_sock(sk); + if (unlikely(!uaddr_len)) { + res = tipc_withdraw(tport, 0, NULL); + goto exit; + } - if (uaddr_len < sizeof(struct sockaddr_tipc)) - return -EINVAL; - if (addr->family != AF_TIPC) - return -EAFNOSUPPORT; + if (uaddr_len < sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + goto exit; + } + if (addr->family != AF_TIPC) { + res = -EAFNOSUPPORT; + goto exit; + } if (addr->addrtype == TIPC_ADDR_NAME) addr->addr.nameseq.upper = addr->addr.nameseq.lower; - else if (addr->addrtype != TIPC_ADDR_NAMESEQ) - return -EAFNOSUPPORT; + else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { + res = -EAFNOSUPPORT; + goto exit; + } if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && (addr->addr.nameseq.type != TIPC_TOP_SRV) && - (addr->addr.nameseq.type != TIPC_CFG_SRV)) - return -EACCES; + (addr->addr.nameseq.type != TIPC_CFG_SRV)) { + res = -EACCES; + goto exit; + } - return (addr->scope > 0) ? - tipc_publish(portref, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq); + res = (addr->scope > 0) ? + tipc_publish(tport, addr->scope, &addr->addr.nameseq) : + tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq); +exit: + release_sock(sk); + return res; } /** From 7a399e3a2e05bc580a78ea72371b3896827f72e1 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 24 Dec 2013 12:16:01 -0200 Subject: [PATCH 104/164] fec: Do not assume that PHY reset is active low We should not assume that the PHY reset is always active low. Retrieve this information from the device tree instead, so that the PHY reset can work on both cases. Reported-by: Philipp Zabel Signed-off-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 50bb71c663e20..45b8b22b9987f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2049,6 +2049,8 @@ static void fec_reset_phy(struct platform_device *pdev) int err, phy_reset; int msec = 1; struct device_node *np = pdev->dev.of_node; + enum of_gpio_flags flags; + bool port; if (!np) return; @@ -2058,18 +2060,22 @@ static void fec_reset_phy(struct platform_device *pdev) if (msec > 1000) msec = 1; - phy_reset = of_get_named_gpio(np, "phy-reset-gpios", 0); + phy_reset = of_get_named_gpio_flags(np, "phy-reset-gpios", 0, &flags); if (!gpio_is_valid(phy_reset)) return; - err = devm_gpio_request_one(&pdev->dev, phy_reset, - GPIOF_OUT_INIT_LOW, "phy-reset"); + if (flags & OF_GPIO_ACTIVE_LOW) + port = GPIOF_OUT_INIT_LOW; + else + port = GPIOF_OUT_INIT_HIGH; + + err = devm_gpio_request_one(&pdev->dev, phy_reset, port, "phy-reset"); if (err) { dev_err(&pdev->dev, "failed to get phy-reset-gpios: %d\n", err); return; } msleep(msec); - gpio_set_value(phy_reset, 1); + gpio_set_value(phy_reset, !port); } #else /* CONFIG_OF */ static void fec_reset_phy(struct platform_device *pdev) From ac3d5ac277352fe6e27809286768e9f1f8aa388d Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 23 Dec 2013 09:27:17 +0000 Subject: [PATCH 105/164] xen-netback: fix guest-receive-side array sizes The sizes chosen for the metadata and grant_copy_op arrays on the guest receive size are wrong; - The meta array is needlessly twice the ring size, when we only ever consume a single array element per RX ring slot - The grant_copy_op array is way too small. It's sized based on a bogus assumption: that at most two copy ops will be used per ring slot. This may have been true at some point in the past but it's clear from looking at start_new_rx_buffer() that a new ring slot is only consumed if a frag would overflow the current slot (plus some other conditions) so the actual limit is MAX_SKB_FRAGS grant_copy_ops per ring slot. This patch fixes those two sizing issues and, because grant_copy_ops grows so much, it pulls it out into a separate chunk of vmalloc()ed memory. Signed-off-by: Paul Durrant Acked-by: Wei Liu Cc: Ian Campbell Cc: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 19 +++++++++++++------ drivers/net/xen-netback/interface.c | 10 ++++++++++ drivers/net/xen-netback/netback.c | 2 +- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 08ae01b41c832..c47794b9d42f9 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -101,6 +101,13 @@ struct xenvif_rx_meta { #define MAX_PENDING_REQS 256 +/* It's possible for an skb to have a maximal number of frags + * but still be less than MAX_BUFFER_OFFSET in size. Thus the + * worst-case number of copy operations is MAX_SKB_FRAGS per + * ring slot. + */ +#define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) + struct xenvif { /* Unique identifier for this interface. */ domid_t domid; @@ -143,13 +150,13 @@ struct xenvif { */ RING_IDX rx_req_cons_peek; - /* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each - * head/fragment page uses 2 copy operations because it - * straddles two buffers in the frontend. - */ - struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; - struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; + /* This array is allocated seperately as it is large */ + struct gnttab_copy *grant_copy_op; + /* We create one meta structure per ring request we consume, so + * the maximum number is the same as the ring size. + */ + struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE]; u8 fe_dev_addr[6]; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 870f1fa583702..34ca4e58a43df 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -307,6 +307,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, SET_NETDEV_DEV(dev, parent); vif = netdev_priv(dev); + + vif->grant_copy_op = vmalloc(sizeof(struct gnttab_copy) * + MAX_GRANT_COPY_OPS); + if (vif->grant_copy_op == NULL) { + pr_warn("Could not allocate grant copy space for %s\n", name); + free_netdev(dev); + return ERR_PTR(-ENOMEM); + } + vif->domid = domid; vif->handle = handle; vif->can_sg = 1; @@ -487,6 +496,7 @@ void xenvif_free(struct xenvif *vif) unregister_netdev(vif->dev); + vfree(vif->grant_copy_op); free_netdev(vif->dev); module_put(THIS_MODULE); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 7b4fd93be76dd..78425554a5375 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -608,7 +608,7 @@ void xenvif_rx_action(struct xenvif *vif) if (!npo.copy_prod) return; - BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); + BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); while ((skb = __skb_dequeue(&rxq)) != NULL) { From f81152e35001e91997ec74a7b4e040e6ab0acccf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Dec 2013 00:32:31 +0100 Subject: [PATCH 106/164] net: rose: restore old recvmsg behavior recvmsg handler in net/rose/af_rose.c performs size-check ->msg_namelen. After commit f3d3342602f8bcbf37d7c46641cb9bca7618eb1c (net: rework recvmsg handler msg_name and msg_namelen logic), we now always take the else branch due to namelen being initialized to 0. Digging in netdev-vger-cvs git repo shows that msg_namelen was initialized with a fixed-size since at least 1995, so the else branch was never taken. Compile tested only. Signed-off-by: Florian Westphal Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/rose/af_rose.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 33af77246bfeb..62ced6516c586 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_name) { struct sockaddr_rose *srose; + struct full_sockaddr_rose *full_srose = msg->msg_name; memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); srose = msg->msg_name; @@ -1260,18 +1261,9 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; - if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name; - for (n = 0 ; n < rose->dest_ndigis ; n++) - full_srose->srose_digis[n] = rose->dest_digis[n]; - msg->msg_namelen = sizeof(struct full_sockaddr_rose); - } else { - if (rose->dest_ndigis >= 1) { - srose->srose_ndigis = 1; - srose->srose_digi = rose->dest_digis[0]; - } - msg->msg_namelen = sizeof(struct sockaddr_rose); - } + for (n = 0 ; n < rose->dest_ndigis ; n++) + full_srose->srose_digis[n] = rose->dest_digis[n]; + msg->msg_namelen = sizeof(struct full_sockaddr_rose); } skb_free_datagram(sk, skb); From 33c133cc7598e60976a069344910d63e56cc4401 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 20 Dec 2013 22:09:04 +0300 Subject: [PATCH 107/164] phy: IRQ cannot be shared With the way PHY IRQ handler is implemented (all real handling being pushed to the workqueue and returning IRQ_HANDLED all the time PHY is active), we cannot really claim that PHY IRQ can be shared when calling request_irq(). Signed-off-by: Sergei Shtylyov Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 36c6994436b7c..98434b84f0415 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -565,10 +565,8 @@ int phy_start_interrupts(struct phy_device *phydev) int err = 0; atomic_set(&phydev->irq_disable, 0); - if (request_irq(phydev->irq, phy_interrupt, - IRQF_SHARED, - "phy_interrupt", - phydev) < 0) { + if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", + phydev) < 0) { pr_warn("%s: Can't get IRQ %d (PHY)\n", phydev->bus->name, phydev->irq); phydev->irq = PHY_POLL; From 7cd013992335b1c5156059248ee765fb3b14d154 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Fri, 20 Dec 2013 11:19:34 -0600 Subject: [PATCH 108/164] stmmac: Fix incorrect spinlock release and PTP cap detection. This patch corrects a problem in stmmac_ptp.c, functions stmmac_adjust_time and stmmac_adjust_freq where the incorrect spinlocks were released. This patch also addresses a problem in stmmac_main, function stmmac_init_ptp where the capability detection for advanced timestamping was masked by message masking. This patch was touch tested using linuxptp, and runs without the previously observed instabilities. More extensive testing is ongoing. Vince Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 +++++++++---------- .../net/ethernet/stmicro/stmmac/stmmac_ptp.c | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8a7a23a84ac5c..797b56a0efc4a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -622,17 +622,15 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; - if (netif_msg_hw(priv)) { - if (priv->dma_cap.time_stamp) { - pr_debug("IEEE 1588-2002 Time Stamp supported\n"); - priv->adv_ts = 0; - } - if (priv->dma_cap.atime_stamp && priv->extend_desc) { - pr_debug - ("IEEE 1588-2008 Advanced Time Stamp supported\n"); - priv->adv_ts = 1; - } - } + priv->adv_ts = 0; + if (priv->dma_cap.atime_stamp && priv->extend_desc) + priv->adv_ts = 1; + + if (netif_msg_hw(priv) && priv->dma_cap.time_stamp) + pr_debug("IEEE 1588-2002 Time Stamp supported\n"); + + if (netif_msg_hw(priv) && priv->adv_ts) + pr_debug("IEEE 1588-2008 Advanced Time Stamp supported\n"); priv->hw->ptp = &stmmac_ptp; priv->hwts_tx_en = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index b8b0eeed0f92b..7680581ebe12f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -56,7 +56,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) priv->hw->ptp->config_addend(priv->ioaddr, addend); - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } @@ -91,7 +91,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) priv->hw->ptp->adjust_systime(priv->ioaddr, sec, nsec, neg_adj); - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } From fcf2f402937a6696f6fa2a1aa882c5075e5fac34 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 18 Dec 2013 16:46:02 +0100 Subject: [PATCH 109/164] s390/pci: obtain function handle in hotplug notifier When using the CLP interface to enable or disable a pci device a valid function handle needs to be delivered. So far our assumption was that we always have an up-to-date version of the function handle (since it doesn't change when the device is in use). This assumption is incorrect if the pci device is enabled or disabled outside of our control. When we are notified about such a change we already receive the new function handle. Just use it. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 800f064b0da7c..069607209a307 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -75,6 +75,7 @@ void zpci_event_availability(void *data) if (!zdev || zdev->state == ZPCI_FN_STATE_CONFIGURED) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; + zdev->fh = ccdf->fh; ret = zpci_enable_device(zdev); if (ret) break; @@ -101,6 +102,7 @@ void zpci_event_availability(void *data) if (pdev) pci_stop_and_remove_bus_device(pdev); + zdev->fh = ccdf->fh; zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; break; From 5781532ebecd9e0b7b5fc25d78030bdec635f4ac Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 13 Dec 2013 21:49:19 +0100 Subject: [PATCH 110/164] ARM/cpuidle: remove __init tag from Calxeda cpuidle probe function Commit 60a66e370007e8535b7a561353b07b37deaf35ba changed the Calxeda cpuidle driver to a platform driver, copying the __init tag from the _init() to the newly used _probe() function. However, "probe should not be __init." (Rob said ;-) Remove the __init tag to fix a section mismatch in the Calxeda cpuidle driver. Signed-off-by: Andre Przywara Signed-off-by: Daniel Lezcano --- drivers/cpuidle/cpuidle-calxeda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index 36795639df0da..6e51114057d09 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -65,7 +65,7 @@ static struct cpuidle_driver calxeda_idle_driver = { .state_count = 2, }; -static int __init calxeda_cpuidle_probe(struct platform_device *pdev) +static int calxeda_cpuidle_probe(struct platform_device *pdev) { return cpuidle_register(&calxeda_idle_driver, NULL); } From b62dfd29eeaf12f8bc79a50f680901e84b351851 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Nov 2013 17:41:14 +1100 Subject: [PATCH 111/164] iommu: Add empty stub for iommu_group_get_by_id() Almost every function in include/linux/iommu.h has an empty stub but the iommu_group_get_by_id() did not get one by mistake. This adds an empty stub for iommu_group_get_by_id() for IOMMU_API disabled config. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a444c790fa723..01ed7345f5083 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -247,6 +247,11 @@ static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) return NULL; } +static inline struct iommu_group *iommu_group_get_by_id(int id) +{ + return NULL; +} + static inline void iommu_domain_free(struct iommu_domain *domain) { } From 21004dcd3ba25163d620eddc703185cd50c89a93 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Wed, 18 Dec 2013 12:01:46 +0530 Subject: [PATCH 112/164] iommu/vt-d: Mark functions as static in dmar.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark the functions check_zero_address() and dmar_get_fault_reason() as static in dmar.c because they are not used outside this file. This eliminates the following warnings in dmar.c: drivers/iommu/dmar.c:491:12: warning: no previous prototype for ‘check_zero_address’ [-Wmissing-prototypes] drivers/iommu/dmar.c:1116:13: warning: no previous prototype for ‘dmar_get_fault_reason’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 8b452c9676d96..fb35d1bd19e10 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -488,7 +488,7 @@ static void warn_invalid_dmar(u64 addr, const char *message) dmi_get_system_info(DMI_PRODUCT_VERSION)); } -int __init check_zero_address(void) +static int __init check_zero_address(void) { struct acpi_table_dmar *dmar; struct acpi_dmar_header *entry_header; @@ -1113,7 +1113,7 @@ static const char *irq_remap_fault_reasons[] = #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) -const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) +static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) { if (fault_reason >= 0x20 && (fault_reason - 0x20 < ARRAY_SIZE(irq_remap_fault_reasons))) { From 6a7885c49b5b79c6e2e73db6fd9db9051b4a28f9 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Wed, 18 Dec 2013 12:04:27 +0530 Subject: [PATCH 113/164] iommu/vt-d: Mark functions as static in intel_irq_remapping.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark functions int get_irte() and ir_dev_scope_init() as static in intel_irq_remapping.c because they are not used outside this file. This eliminates the following warnings in intel_irq_remapping.c: drivers/iommu/intel_irq_remapping.c:49:5: warning: no previous prototype for ‘get_irte’ [-Wmissing-prototypes] drivers/iommu/intel_irq_remapping.c:810:12: warning: no previous prototype for ‘ir_dev_scope_init’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index bab10b1002fbf..c988b8d85df88 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -46,7 +46,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq) return cfg ? &cfg->irq_2_iommu : NULL; } -int get_irte(int irq, struct irte *entry) +static int get_irte(int irq, struct irte *entry) { struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); unsigned long flags; @@ -807,7 +807,7 @@ int __init parse_ioapics_under_ir(void) return 1; } -int __init ir_dev_scope_init(void) +static int __init ir_dev_scope_init(void) { if (!irq_remapping_enabled) return 0; From d2d1e8fe430c5d26781b14695b05ba46ae8f525b Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Wed, 18 Dec 2013 12:06:45 +0530 Subject: [PATCH 114/164] iommu/vt-d: Mark function eoi_ioapic_pin_remapped() as static in irq_remapping.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark function eoi_ioapic_pin_remapped() as static in irq_remapping.c because it is not used outside this file. This eliminates the following warning in irq_remapping.c:drivers/iommu/irq_remapping.c:153:6: warning: no previous prototype for ‘eoi_ioapic_pin_remapped’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Signed-off-by: Joerg Roedel --- drivers/iommu/irq_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 39f81aeefcd69..3b05d1b939c54 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -150,7 +150,7 @@ static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, return do_setup_msix_irqs(dev, nvec); } -void eoi_ioapic_pin_remapped(int apic, int pin, int vector) +static void eoi_ioapic_pin_remapped(int apic, int pin, int vector) { /* * Intr-remapping uses pin number as the virtual vector From 13fcca8f25f4e9ce7f55da9cd353bb743236e212 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 29 Dec 2013 19:37:43 -0600 Subject: [PATCH 115/164] Revert "of/address: Handle #address-cells > 2 specially" This reverts commit e38c0a1fbc5803cbacdaac0557c70ac8ca5152e7. Nikita Yushchenko reports: While trying to make freescale p2020ds and mpc8572ds boards working with mainline kernel, I faced that commit e38c0a1f (Handle Both these boards have uli1575 chip. Corresponding part in device tree is something like uli1575@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; #address-cells = <3>; ranges = <0x2000000 0x0 0x80000000 0x2000000 0x0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x0 0x1000000 0x0 0x0 0x0 0x10000>; isa@1e { ... I.e. it has #address-cells = <3> With commit e38c0a1f reverted, devices under uli1575 are registered correctly, e.g. for rtc OF: ** translation for device /pcie@ffe09000/pcie@0/uli1575@0/isa@1e/rtc@70 ** OF: bus is isa (na=2, ns=1) on /pcie@ffe09000/pcie@0/uli1575@0/isa@1e OF: translating address: 00000001 00000070 OF: parent bus is default (na=3, ns=2) on /pcie@ffe09000/pcie@0/uli1575@0 OF: walking ranges... OF: ISA map, cp=0, s=1000, da=70 OF: parent translation for: 01000000 00000000 00000000 OF: with offset: 70 OF: one level translation: 00000000 00000000 00000070 OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000/pcie@0 OF: walking ranges... OF: default map, cp=a0000000, s=20000000, da=70 OF: default map, cp=0, s=10000, da=70 OF: parent translation for: 01000000 00000000 00000000 OF: with offset: 70 OF: one level translation: 01000000 00000000 00000070 OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000 OF: walking ranges... OF: PCI map, cp=0, s=10000, da=70 OF: parent translation for: 01000000 00000000 00000000 OF: with offset: 70 OF: one level translation: 01000000 00000000 00000070 OF: parent bus is default (na=2, ns=2) on / OF: walking ranges... OF: PCI map, cp=0, s=10000, da=70 OF: parent translation for: 00000000 ffc10000 OF: with offset: 70 OF: one level translation: 00000000 ffc10070 OF: reached root node With commit e38c0a1f in place, address translation fails: OF: ** translation for device /pcie@ffe09000/pcie@0/uli1575@0/isa@1e/rtc@70 ** OF: bus is isa (na=2, ns=1) on /pcie@ffe09000/pcie@0/uli1575@0/isa@1e OF: translating address: 00000001 00000070 OF: parent bus is default (na=3, ns=2) on /pcie@ffe09000/pcie@0/uli1575@0 OF: walking ranges... OF: ISA map, cp=0, s=1000, da=70 OF: parent translation for: 01000000 00000000 00000000 OF: with offset: 70 OF: one level translation: 00000000 00000000 00000070 OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000/pcie@0 OF: walking ranges... OF: default map, cp=a0000000, s=20000000, da=70 OF: default map, cp=0, s=10000, da=70 OF: not found ! Thierry Reding confirmed this commit was not needed after all: "We ended up merging a different address representation for Tegra PCIe and I've confirmed that reverting this commit doesn't cause any obvious regressions. I think all other drivers in drivers/pci/host ended up copying what we did on Tegra, so I wouldn't expect any other breakage either." There doesn't appear to be a simple way to support both behaviours, so reverting this as nothing should be depending on the new behaviour. Cc: stable@vger.kernel.org # v3.7+ Signed-off-by: Rob Herring --- drivers/of/address.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 4b9317bdb81ce..d3dd41c840f1c 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -69,14 +69,6 @@ static u64 of_bus_default_map(__be32 *addr, const __be32 *range, (unsigned long long)cp, (unsigned long long)s, (unsigned long long)da); - /* - * If the number of address cells is larger than 2 we assume the - * mapping doesn't specify a physical address. Rather, the address - * specifies an identifier that must match exactly. - */ - if (na > 2 && memcmp(range, addr, na * 4) != 0) - return OF_BAD_ADDR; - if (da < cp || da >= (cp + s)) return OF_BAD_ADDR; return da - cp; From 5d9270869b6cd3cb098ad9393c0cb62ab184e35e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Dec 2013 21:06:01 +0100 Subject: [PATCH 116/164] of/Kconfig: Spelling s/one/once/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- drivers/of/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index de6f8990246fe..c6973f101a3e4 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -20,7 +20,7 @@ config OF_SELFTEST depends on OF_IRQ help This option builds in test cases for the device tree infrastructure - that are executed one at boot time, and the results dumped to the + that are executed once at boot time, and the results dumped to the console. If unsure, say N here, but this option is safe to enable. From 2f53a713c4b6c1d2f32d87c532d47ad17861053a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 17 Dec 2013 18:32:53 +0100 Subject: [PATCH 117/164] of/irq: Fix device_node refcount in of_irq_parse_raw() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2361613206e6, "of/irq: Refactor interrupt-map parsing" changed the refcount on the device_node causing an error in of_node_put(): ERROR: Bad of_node_put() on /pci@800000020000000 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.13.0-rc3-dirty #2 Call Trace: [c00000003e403500] [c0000000000144fc] .show_stack+0x7c/0x1f0 (unreliable) [c00000003e4035d0] [c00000000070f250] .dump_stack+0x88/0xb4 [c00000003e403650] [c0000000005e8768] .of_node_release+0xd8/0xf0 [c00000003e4036e0] [c0000000005eeafc] .of_irq_parse_one+0x10c/0x280 [c00000003e4037a0] [c0000000005efd4c] .of_irq_parse_pci+0x3c/0x1d0 [c00000003e403840] [c000000000038240] .pcibios_setup_device+0xa0/0x2e0 [c00000003e403910] [c0000000000398f0] .pcibios_setup_bus_devices+0x60/0xd0 [c00000003e403990] [c00000000003b3a4] .__of_scan_bus+0x1a4/0x2b0 [c00000003e403a80] [c00000000003a62c] .pcibios_scan_phb+0x30c/0x410 [c00000003e403b60] [c0000000009fe430] .pcibios_init+0x7c/0xd4 This patch adjusts the refcount in the walk of the interrupt tree. When a match is found, there is no need to increase the refcount on 'out_irq->np' as 'newpar' is already holding a ref. The refcount balance between 'ipar' and 'newpar' is maintained in the skiplevel: goto label. This patch also removes the usage of the device_node variable 'old' which seems useless after the latest changes. Signed-off-by: Cédric Le Goater Signed-off-by: Rob Herring --- drivers/of/irq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 786b0b47fae46..27212402c5324 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -165,7 +165,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) if (of_get_property(ipar, "interrupt-controller", NULL) != NULL) { pr_debug(" -> got it !\n"); - of_node_put(old); return 0; } @@ -250,8 +249,7 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) * Successfully parsed an interrrupt-map translation; copy new * interrupt specifier into the out_irq structure */ - of_node_put(out_irq->np); - out_irq->np = of_node_get(newpar); + out_irq->np = newpar; match_array = imap - newaddrsize - newintsize; for (i = 0; i < newintsize; i++) @@ -268,7 +266,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) } fail: of_node_put(ipar); - of_node_put(out_irq->np); of_node_put(newpar); return -EINVAL; From 5d3ad8a63844f496f5c3a1f8bc3d25dc374c8360 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 3 Dec 2013 10:20:16 -0600 Subject: [PATCH 118/164] MAINTAINERS: Update Rob Herring's email address My Calxeda email address is going away. Signed-off-by: Rob Herring Signed-off-by: Rob Herring --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d5e4ff328cc71..21c038f216e31 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -783,7 +783,7 @@ F: arch/arm/boot/dts/sama*.dts F: arch/arm/boot/dts/sama*.dtsi ARM/CALXEDA HIGHBANK ARCHITECTURE -M: Rob Herring +M: Rob Herring L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-highbank/ @@ -6256,7 +6256,7 @@ F: drivers/i2c/busses/i2c-ocores.c OPEN FIRMWARE AND FLATTENED DEVICE TREE M: Grant Likely -M: Rob Herring +M: Rob Herring L: devicetree@vger.kernel.org W: http://fdt.secretlab.ca T: git git://git.secretlab.ca/git/linux-2.6.git @@ -6268,7 +6268,7 @@ K: of_get_property K: of_match_table OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS -M: Rob Herring +M: Rob Herring M: Pawel Moll M: Mark Rutland M: Ian Campbell From e66d2ae7c67bd9ac982a3d1890564de7f7eabf4b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 29 Dec 2013 02:29:30 +0100 Subject: [PATCH 119/164] KVM: x86: Fix APIC map calculation after re-enabling Update arch.apic_base before triggering recalculate_apic_map. Otherwise the recalculation will work against the previous state of the APIC and will fail to build the correct map when an APIC is hardware-enabled again. This fixes a regression of 1e08ec4a13. Cc: stable@vger.kernel.org Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index dec48bfaddb8f..1673940cf9c35 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1350,6 +1350,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) return; } + if (!kvm_vcpu_is_bsp(apic->vcpu)) + value &= ~MSR_IA32_APICBASE_BSP; + vcpu->arch.apic_base = value; + /* update jump label if enable bit changes */ if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { if (value & MSR_IA32_APICBASE_ENABLE) @@ -1359,10 +1363,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) recalculate_apic_map(vcpu->kvm); } - if (!kvm_vcpu_is_bsp(apic->vcpu)) - value &= ~MSR_IA32_APICBASE_BSP; - - vcpu->arch.apic_base = value; if ((old_value ^ value) & X2APIC_ENABLE) { if (value & X2APIC_ENABLE) { u32 id = kvm_apic_id(apic); From 98a947abdd54e5de909bebadfced1696ccad30cf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 Dec 2013 13:37:46 +0100 Subject: [PATCH 120/164] intel_pstate: Fail initialization if P-state information is missing If pstate.current_pstate is 0 after the initial intel_pstate_get_cpu_pstates(), this means that we were unable to obtain any useful P-state information and there is no reason to continue, so free memory and return an error in that case. This fixes the following divide error occuring in a nested KVM guest: Intel P-state driver initializing. Intel pstate controlling: cpu 0 cpufreq: __cpufreq_add_dev: ->get() failed divide error: 0000 [#1] SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.13.0-0.rc4.git5.1.fc21.x86_64 #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff88001ea20000 ti: ffff88001e9bc000 task.ti: ffff88001e9bc000 RIP: 0010:[] [] intel_pstate_timer_func+0x11d/0x2b0 RSP: 0000:ffff88001ee03e18 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88001a454348 RCX: 0000000000006100 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88001ee03e38 R08: 0000000000000000 R09: 0000000000000000 R10: ffff88001ea20000 R11: 0000000000000000 R12: 00000c0a1ea20000 R13: 1ea200001ea20000 R14: ffffffff815c5400 R15: ffff88001a454348 FS: 0000000000000000(0000) GS:ffff88001ee00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 0000000001c0c000 CR4: 00000000000006f0 Stack: fffffffb1a454390 ffffffff821a4500 ffff88001a454390 0000000000000100 ffff88001ee03ea8 ffffffff81083e9a ffffffff81083e15 ffffffff82d5ed40 ffffffff8258cc60 0000000000000000 ffffffff81ac39de 0000000000000000 Call Trace: [] call_timer_fn+0x8a/0x310 [] ? call_timer_fn+0x5/0x310 [] ? pid_param_set+0x130/0x130 [] run_timer_softirq+0x234/0x380 [] __do_softirq+0x104/0x430 [] irq_exit+0xcd/0xe0 [] smp_apic_timer_interrupt+0x45/0x60 [] apic_timer_interrupt+0x72/0x80 [] ? vprintk_emit+0x1dd/0x5e0 [] printk+0x67/0x69 [] __cpufreq_add_dev.isra.13+0x883/0x8d0 [] cpufreq_add_dev+0x10/0x20 [] subsys_interface_register+0xb1/0xf0 [] cpufreq_register_driver+0x9f/0x210 [] intel_pstate_init+0x27d/0x3be [] ? mutex_unlock+0xe/0x10 [] ? cpufreq_gov_dbs_init+0x12/0x12 [] do_one_initcall+0xfa/0x1b0 [] ? parse_args+0x225/0x3f0 [] kernel_init_freeable+0x1fc/0x287 [] ? do_early_param+0x88/0x88 [] ? rest_init+0x150/0x150 [] kernel_init+0xe/0x130 [] ret_from_fork+0x7c/0xb0 [] ? rest_init+0x150/0x150 Code: c1 e0 05 48 63 bc 03 10 01 00 00 48 63 83 d0 00 00 00 48 63 d6 48 c1 e2 08 c1 e1 08 4c 63 c2 48 c1 e0 08 48 98 48 c1 e0 08 48 99 <49> f7 f8 48 98 48 0f af f8 48 c1 ff 08 29 f9 89 ca c1 fa 1f 89 RIP [] intel_pstate_timer_func+0x11d/0x2b0 RSP ---[ end trace f166110ed22cc37a ]--- Kernel panic - not syncing: Fatal exception in interrupt Reported-and-tested-by: Kashyap Chamarthy Cc: Josh Boyer Signed-off-by: Rafael J. Wysocki Cc: All applicable --- drivers/cpufreq/intel_pstate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 5f1cbae369611..f9d561e198ab5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -614,6 +614,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu = all_cpu_data[cpunum]; intel_pstate_get_cpu_pstates(cpu); + if (!cpu->pstate.current_pstate) { + all_cpu_data[cpunum] = NULL; + kfree(cpu); + return -ENODATA; + } cpu->cpu = cpunum; From f244d8b623dae7a7bc695b0336f67729b95a9736 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 Dec 2013 13:39:42 +0100 Subject: [PATCH 121/164] ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian Reported-and-tested-by: Reported-and-tested-by: Joaquín Aramendía Cc: Alex Deucher Cc: Dave Airlie Cc: Takashi Iwai Signed-off-by: Rafael J. Wysocki Cc: 3.12+ # 3.12+ --- drivers/acpi/bus.c | 10 ++++++++ drivers/gpu/drm/nouveau/nouveau_acpi.c | 16 ++++++++++-- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 16 ++++++++++-- drivers/pci/hotplug/acpiphp_glue.c | 26 +++++++++++++++++--- include/acpi/acpi_bus.h | 4 ++- 5 files changed, 64 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index bba9b72e25f82..0710004055c80 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -156,6 +156,16 @@ int acpi_bus_get_private_data(acpi_handle handle, void **data) } EXPORT_SYMBOL(acpi_bus_get_private_data); +void acpi_bus_no_hotplug(acpi_handle handle) +{ + struct acpi_device *adev = NULL; + + acpi_bus_get_device(handle, &adev); + if (adev) + adev->flags.no_hotplug = true; +} +EXPORT_SYMBOL_GPL(acpi_bus_no_hotplug); + static void acpi_print_osc_error(acpi_handle handle, struct acpi_osc_context *context, char *error) { diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 95c740454049a..ba0183fb84f3b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -51,6 +51,7 @@ static struct nouveau_dsm_priv { bool dsm_detected; bool optimus_detected; acpi_handle dhandle; + acpi_handle other_handle; acpi_handle rom_handle; } nouveau_dsm_priv; @@ -260,9 +261,10 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev) if (!dhandle) return false; - if (!acpi_has_method(dhandle, "_DSM")) + if (!acpi_has_method(dhandle, "_DSM")) { + nouveau_dsm_priv.other_handle = dhandle; return false; - + } if (nouveau_test_dsm(dhandle, nouveau_dsm, NOUVEAU_DSM_POWER)) retval |= NOUVEAU_DSM_HAS_MUX; @@ -338,6 +340,16 @@ static bool nouveau_dsm_detect(void) printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n", acpi_method_name); nouveau_dsm_priv.dsm_detected = true; + /* + * On some systems hotplug events are generated for the device + * being switched off when _DSM is executed. They cause ACPI + * hotplug to trigger and attempt to remove the device from + * the system, which causes it to break down. Prevent that from + * happening by setting the no_hotplug flag for the involved + * ACPI device objects. + */ + acpi_bus_no_hotplug(nouveau_dsm_priv.dhandle); + acpi_bus_no_hotplug(nouveau_dsm_priv.other_handle); ret = true; } diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 9d302eaeea158..485848f889f55 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -33,6 +33,7 @@ static struct radeon_atpx_priv { bool atpx_detected; /* handle for device - and atpx */ acpi_handle dhandle; + acpi_handle other_handle; struct radeon_atpx atpx; } radeon_atpx_priv; @@ -451,9 +452,10 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATPX", &atpx_handle); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { + radeon_atpx_priv.other_handle = dhandle; return false; - + } radeon_atpx_priv.dhandle = dhandle; radeon_atpx_priv.atpx.handle = atpx_handle; return true; @@ -530,6 +532,16 @@ static bool radeon_atpx_detect(void) printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n", acpi_method_name); radeon_atpx_priv.atpx_detected = true; + /* + * On some systems hotplug events are generated for the device + * being switched off when ATPX is executed. They cause ACPI + * hotplug to trigger and attempt to remove the device from + * the system, which causes it to break down. Prevent that from + * happening by setting the no_hotplug flag for the involved + * ACPI device objects. + */ + acpi_bus_no_hotplug(radeon_atpx_priv.dhandle); + acpi_bus_no_hotplug(radeon_atpx_priv.other_handle); return true; } return false; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 438a4d09958ba..e86439283a5d1 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -645,6 +645,24 @@ static void disable_slot(struct acpiphp_slot *slot) slot->flags &= (~SLOT_ENABLED); } +static bool acpiphp_no_hotplug(acpi_handle handle) +{ + struct acpi_device *adev = NULL; + + acpi_bus_get_device(handle, &adev); + return adev && adev->flags.no_hotplug; +} + +static bool slot_no_hotplug(struct acpiphp_slot *slot) +{ + struct acpiphp_func *func; + + list_for_each_entry(func, &slot->funcs, sibling) + if (acpiphp_no_hotplug(func_to_handle(func))) + return true; + + return false; +} /** * get_slot_status - get ACPI slot status @@ -703,7 +721,8 @@ static void trim_stale_devices(struct pci_dev *dev) unsigned long long sta; status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - alive = ACPI_SUCCESS(status) && sta == ACPI_STA_ALL; + alive = (ACPI_SUCCESS(status) && sta == ACPI_STA_ALL) + || acpiphp_no_hotplug(handle); } if (!alive) { u32 v; @@ -743,8 +762,9 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) struct pci_dev *dev, *tmp; mutex_lock(&slot->crit_sect); - /* wake up all functions */ - if (get_slot_status(slot) == ACPI_STA_ALL) { + if (slot_no_hotplug(slot)) { + ; /* do nothing */ + } else if (get_slot_status(slot) == ACPI_STA_ALL) { /* remove stale devices if any */ list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 7b2de026a4f3d..ca49ebd231ca8 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -168,7 +168,8 @@ struct acpi_device_flags { u32 ejectable:1; u32 power_manageable:1; u32 match_driver:1; - u32 reserved:27; + u32 no_hotplug:1; + u32 reserved:26; }; /* File System */ @@ -343,6 +344,7 @@ extern struct kobject *acpi_kobj; extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int); void acpi_bus_private_data_handler(acpi_handle, void *); int acpi_bus_get_private_data(acpi_handle, void **); +void acpi_bus_no_hotplug(acpi_handle handle); extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32); extern int register_acpi_notifier(struct notifier_block *); extern int unregister_acpi_notifier(struct notifier_block *); From 28a2a2e1aedbe2d8b2301e6e0e4e63f6e4177aca Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 26 Dec 2013 17:44:29 -0800 Subject: [PATCH 122/164] Input: allocate absinfo data when setting ABS capability We need to make sure we allocate absinfo data when we are setting one of EV_ABS/ABS_XXX capabilities, otherwise we may bomb when we try to emit this event. Rested-by: Paul Cercueil Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/input.c b/drivers/input/input.c index 846ccdd905b19..d2965e4b32243 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1871,6 +1871,10 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int break; case EV_ABS: + input_alloc_absinfo(dev); + if (!dev->absinfo) + return; + __set_bit(code, dev->absbit); break; From efb753b8e0136fe66b507e8e1e42b5a2df7ba44d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Tue, 31 Dec 2013 11:51:16 +0100 Subject: [PATCH 123/164] crypto: ixp4xx - Fix kernel compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/crypto/ixp4xx_crypto.c: In function 'ixp_module_init': drivers/crypto/ixp4xx_crypto.c:1419:2: error: 'dev' undeclared (first use in this function) Now builds. Not tested on real hw. Signed-off-by: Krzysztof Hałasa Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 9dd6e01eac330..f757a0f428bde 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -1410,14 +1410,12 @@ static const struct platform_device_info ixp_dev_info __initdata = { static int __init ixp_module_init(void) { int num = ARRAY_SIZE(ixp4xx_algos); - int i, err ; + int i, err; pdev = platform_device_register_full(&ixp_dev_info); if (IS_ERR(pdev)) return PTR_ERR(pdev); - dev = &pdev->dev; - spin_lock_init(&desc_lock); spin_lock_init(&emerg_lock); From 0b3a2c9968d453d5827e635a6f3d69129f70af66 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 2 Jan 2014 19:52:20 +0900 Subject: [PATCH 124/164] GFS2: Fix unsafe dereference in dump_holder() GLOCK_BUG_ON() might call this function without RCU read lock. Make sure that RCU read lock is held when using task_struct returned from pid_task(). Signed-off-by: Tetsuo Handa Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c8420f7e4db60..6f7a47c052592 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) struct task_struct *gh_owner = NULL; char flags_buf[32]; + rcu_read_lock(); if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", @@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, gh_owner ? gh_owner->comm : "(ended)", (void *)gh->gh_ip); + rcu_read_unlock(); return 0; } From 29bf08f12b2fd72b882da0d85b7385e4a438a297 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 28 Dec 2013 16:31:52 +0100 Subject: [PATCH 125/164] KVM: nVMX: Unconditionally uninit the MMU on nested vmexit Three reasons for doing this: 1. arch.walk_mmu points to arch.mmu anyway in case nested EPT wasn't in use. 2. this aligns VMX with SVM. But 3. is most important: nested_cpu_has_ept(vmcs12) queries the VMCS page, and if one guest VCPU manipulates the page of another VCPU in L2, we may be fooled to skip over the nested_ept_uninit_mmu_context, leaving mmu in nested state. That can crash the host later on if nested_ept_get_cr3 is invoked while L1 already left vmxon and nested.current_vmcs12 became NULL therefore. Cc: stable@kernel.org Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b2fe1c252f35f..da7837e1349da 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8283,8 +8283,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); kvm_set_cr4(vcpu, vmcs12->host_cr4); - if (nested_cpu_has_ept(vmcs12)) - nested_ept_uninit_mmu_context(vcpu); + nested_ept_uninit_mmu_context(vcpu); kvm_set_cr3(vcpu, vmcs12->host_cr3); kvm_mmu_reset_context(vcpu); From c424be1cbbf852e46acc84d73162af3066cd2c86 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 2 Jan 2014 12:58:43 -0800 Subject: [PATCH 126/164] mm: munlock: fix a bug where THP tail page is encountered Since commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP pages") munlock skips tail pages of a munlocked THP page. However, when the head page already has PageMlocked unset, it will not skip the tail pages. Commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and munlock+putback using pagevec") has added a PageTransHuge() check which contains VM_BUG_ON(PageTail(page)). Sasha Levin found this triggered using trinity, on the first tail page of a THP page without PageMlocked flag. This patch fixes the issue by skipping tail pages also in the case when PageMlocked flag is unset. There is still a possibility of race with THP page split between clearing PageMlocked and determining how many pages to skip. The race might result in former tail pages not being skipped, which is however no longer a bug, as during the skip the PageTail flags are cleared. However this race also affects correctness of NR_MLOCK accounting, which is to be fixed in a separate patch. Signed-off-by: Vlastimil Babka Reported-by: Sasha Levin Cc: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Mel Gorman Cc: Hugh Dickins Cc: Bob Liu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index d480cd6fc4758..c59c420fd6e16 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page) /** * munlock_vma_page - munlock a vma page - * @page - page to be unlocked + * @page - page to be unlocked, either a normal page or THP page head + * + * returns the size of the page as a page mask (0 for normal page, + * HPAGE_PMD_NR - 1 for THP head page) * * called from munlock()/munmap() path with page supposedly on the LRU. * When we munlock a page, because the vma where we found the page is being @@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page) */ unsigned int munlock_vma_page(struct page *page) { - unsigned int page_mask = 0; + unsigned int nr_pages; BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { - unsigned int nr_pages = hpage_nr_pages(page); + nr_pages = hpage_nr_pages(page); mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - page_mask = nr_pages - 1; if (!isolate_lru_page(page)) __munlock_isolated_page(page); else __munlock_isolation_failed(page); + } else { + nr_pages = hpage_nr_pages(page); } - return page_mask; + /* + * Regardless of the original PageMlocked flag, we determine nr_pages + * after touching the flag. This leaves a possible race with a THP page + * split, such that a whole THP page was munlocked, but nr_pages == 1. + * Returning a smaller mask due to that is OK, the worst that can + * happen is subsequent useless scanning of the former tail pages. + * The NR_MLOCK accounting can however become broken. + */ + return nr_pages - 1; } /** @@ -440,7 +452,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page = NULL; - unsigned int page_mask, page_increm; + unsigned int page_mask; + unsigned long page_increm; struct pagevec pvec; struct zone *zone; int zoneid; @@ -490,7 +503,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, goto next; } } - page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + /* It's a bug to munlock in the middle of a THP page */ + VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); + page_increm = 1 + page_mask; start += page_increm * PAGE_SIZE; next: cond_resched(); From 3b25df93c6e37e323b86a2a8c1e00c0a2821c6c9 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 2 Jan 2014 12:58:44 -0800 Subject: [PATCH 127/164] mm: munlock: fix deadlock in __munlock_pagevec() Commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and munlock+putback using pagevec" introduced __munlock_pagevec() to speed up munlock by holding lru_lock over multiple isolated pages. Pages that fail to be isolated are put_page()d immediately, also within the lock. This can lead to deadlock when __munlock_pagevec() becomes the holder of the last page pin and put_page() leads to __page_cache_release() which also locks lru_lock. The deadlock has been observed by Sasha Levin using trinity. This patch avoids the deadlock by deferring put_page() operations until lru_lock is released. Another pagevec (which is also used by later phases of the function is reused to gather the pages for put_page() operation. Signed-off-by: Vlastimil Babka Reported-by: Sasha Levin Cc: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Mel Gorman Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index c59c420fd6e16..192e6eebe4f24 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -298,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked = -nr; + int delta_munlocked; struct pagevec pvec_putback; int pgrescued = 0; + pagevec_init(&pvec_putback, 0); + /* Phase 1: page isolation */ spin_lock_irq(&zone->lru_lock); for (i = 0; i < nr; i++) { @@ -330,18 +332,21 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) /* * We won't be munlocking this page in the next phase * but we still need to release the follow_page_mask() - * pin. + * pin. We cannot do it under lru_lock however. If it's + * the last pin, __page_cache_release would deadlock. */ + pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; - put_page(page); - delta_munlocked++; } } + delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(&zone->lru_lock); + /* Now we can release pins of pages that we are not munlocking */ + pagevec_release(&pvec_putback); + /* Phase 2: page munlock */ - pagevec_init(&pvec_putback, 0); for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; From 4eb919825e6c3c7fb3630d5621f6d11e98a18b3a Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 2 Jan 2014 12:58:46 -0800 Subject: [PATCH 128/164] mm: fix use-after-free in sys_remap_file_pages remap_file_pages calls mmap_region, which may merge the VMA with other existing VMAs, and free "vma". This can lead to a use-after-free bug. Avoid the bug by remembering vm_flags before calling mmap_region, and not trying to dereference vma later. Signed-off-by: Rik van Riel Reported-by: Dmitry Vyukov Cc: PaX Team Cc: Kees Cook Cc: Michel Lespinasse Cc: Cyrill Gorcunov Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/fremap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/fremap.c b/mm/fremap.c index 5bff081477687..bbc4d660221ac 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -208,9 +208,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (mapping_cap_account_dirty(mapping)) { unsigned long addr; struct file *file = get_file(vma->vm_file); + /* mmap_region may free vma; grab the info now */ + vm_flags = vma->vm_flags; - addr = mmap_region(file, start, size, - vma->vm_flags, pgoff); + addr = mmap_region(file, start, size, vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; @@ -218,7 +219,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, BUG_ON(addr != start); err = 0; } - goto out; + goto out_freed; } mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); @@ -253,6 +254,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, out: if (vma) vm_flags = vma->vm_flags; +out_freed: if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else From 695c60830764945cf61a2cc623eb1392d137223e Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 2 Jan 2014 12:58:47 -0800 Subject: [PATCH 129/164] memcg: fix memcg_size() calculation The mem_cgroup structure contains nr_node_ids pointers to mem_cgroup_per_node objects, not the objects themselves. Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Cc: Glauber Costa Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bf5e894571494..7f1a356153c01 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -338,7 +338,7 @@ struct mem_cgroup { static size_t memcg_size(void) { return sizeof(struct mem_cgroup) + - nr_node_ids * sizeof(struct mem_cgroup_per_node); + nr_node_ids * sizeof(struct mem_cgroup_per_node *); } /* internal only representation about the status of kmem accounting. */ From d0319bd52e373c661f9bc0c04dec85d6f87ac517 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 2 Jan 2014 12:58:49 -0800 Subject: [PATCH 130/164] mm: remove bogus warning in copy_huge_pmd() Sasha Levin reported the following warning being triggered WARNING: CPU: 28 PID: 35287 at mm/huge_memory.c:887 copy_huge_pmd+0x145/ 0x3a0() Call Trace: copy_huge_pmd+0x145/0x3a0 copy_page_range+0x3f2/0x560 dup_mmap+0x2c9/0x3d0 dup_mm+0xad/0x150 copy_process+0xa68/0x12e0 do_fork+0x96/0x270 SyS_clone+0x16/0x20 stub_clone+0x69/0x90 This warning was introduced by "mm: numa: Avoid unnecessary disruption of NUMA hinting during migration" for paranoia reasons but the warning is bogus. I was thinking of parallel races between NUMA hinting faults and forks but this warning would also be triggered by a parallel reclaim splitting a THP during a fork. Remote the bogus warning. Signed-off-by: Mel Gorman Reported-by: Sasha Levin Cc: Alex Thorlton Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7de1bf85f6833..9c0b17295ba03 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -883,9 +883,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_unlock; } - /* mmap_sem prevents this happening but warn if that changes */ - WARN_ON(pmd_trans_migrating(pmd)); - if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ spin_unlock(src_ptl); From c2fd4e38032239746badb0e21d72c3a00438feac Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 2 Jan 2014 12:58:50 -0800 Subject: [PATCH 131/164] MAINTAINERS: set up proper record for Xilinx Zynq Setup correct zynq entry. - Add missing cadence_ttc_timer maintainership - Add zynq wildcard - Add xilinx wildcard Signed-off-by: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 21c038f216e31..6c2079270791b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1368,6 +1368,9 @@ T: git git://git.xilinx.com/linux-xlnx.git S: Supported F: arch/arm/mach-zynq/ F: drivers/cpuidle/cpuidle-zynq.c +N: zynq +N: xilinx +F: drivers/clocksource/cadence_ttc_timer.c ARM SMMU DRIVER M: Will Deacon From a3e0f9e47d5ef7858a26cc12d90ad5146e802d47 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 2 Jan 2014 12:58:51 -0800 Subject: [PATCH 132/164] mm/memory-failure.c: transfer page count from head page to tail page after split thp Memory failures on thp tail pages cause kernel panic like below: mce: [Hardware Error]: Machine check events logged MCE exception done on CPU 7 BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: [] dequeue_hwpoisoned_huge_page+0x131/0x1e0 PGD bae42067 PUD ba47d067 PMD 0 Oops: 0000 [#1] SMP ... CPU: 7 PID: 128 Comm: kworker/7:2 Tainted: G M O 3.13.0-rc4-131217-1558-00003-g83b7df08e462 #25 ... Call Trace: me_huge_page+0x3e/0x50 memory_failure+0x4bb/0xc20 mce_process_work+0x3e/0x70 process_one_work+0x171/0x420 worker_thread+0x11b/0x3a0 ? manage_workers.isra.25+0x2b0/0x2b0 kthread+0xe4/0x100 ? kthread_create_on_node+0x190/0x190 ret_from_fork+0x7c/0xb0 ? kthread_create_on_node+0x190/0x190 ... RIP dequeue_hwpoisoned_huge_page+0x131/0x1e0 CR2: 0000000000000058 The reasoning of this problem is shown below: - when we have a memory error on a thp tail page, the memory error handler grabs a refcount of the head page to keep the thp under us. - Before unmapping the error page from processes, we split the thp, where page refcounts of both of head/tail pages don't change. - Then we call try_to_unmap() over the error page (which was a tail page before). We didn't pin the error page to handle the memory error, this error page is freed and removed from LRU list. - We never have the error page on LRU list, so the first page state check returns "unknown page," then we move to the second check with the saved page flag. - The saved page flag have PG_tail set, so the second page state check returns "hugepage." - We call me_huge_page() for freed error page, then we hit the above panic. The root cause is that we didn't move refcount from the head page to the tail page after split thp. So this patch suggests to do this. This panic was introduced by commit 524fca1e73 ("HWPOISON: fix misjudgement of page_action() for errors on mlocked pages"). Note that we did have the same refcount problem before this commit, but it was just ignored because we had only first page state check which returned "unknown page." The commit changed the refcount problem from "doesn't work" to "kernel panic." Signed-off-by: Naoya Horiguchi Reviewed-by: Wanpeng Li Cc: Andi Kleen Cc: [3.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index db08af92c6fce..fabe55046c1d7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -938,6 +938,16 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, BUG_ON(!PageHWPoison(p)); return SWAP_FAIL; } + /* + * We pinned the head page for hwpoison handling, + * now we split the thp and we are interested in + * the hwpoisoned raw page, so move the refcount + * to it. + */ + if (hpage != p) { + put_page(hpage); + get_page(p); + } /* THP is split, so ppage should be the real poisoned page. */ ppage = p; } From 3532e5660fa4b610b982fe5c09d3e8ab065c55dc Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 2 Jan 2014 12:58:52 -0800 Subject: [PATCH 133/164] drivers/dma/ioat/dma.c: check DMA mapping error in ioat_dma_self_test() Check DMA mapping return values in function ioat_dma_self_test() to get rid of following warning message. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1203 at lib/dma-debug.c:937 check_unmap+0x4c0/0x9a0() ioatdma 0000:00:04.0: DMA-API: device driver failed to check map error[device address=0x000000085191b000] [size=2000 bytes] [mapped as single] Modules linked in: ioatdma(+) mac_hid wmi acpi_pad lp parport hidd_generic usbhid hid ixgbe isci dca libsas ahci ptp libahci scsi_transport_sas meegaraid_sas pps_core mdio CPU: 0 PID: 1203 Comm: systemd-udevd Not tainted 3.13.0-rc4+ #8 Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRIVTIIN1.86B.0044.L09.1311181644 11/18/2013 Call Trace: dump_stack+0x4d/0x66 warn_slowpath_common+0x7d/0xa0 warn_slowpath_fmt+0x4c/0x50 check_unmap+0x4c0/0x9a0 debug_dma_unmap_page+0x81/0x90 ioat_dma_self_test+0x3d2/0x680 [ioatdma] ioat3_dma_self_test+0x12/0x30 [ioatdma] ioat_probe+0xf4/0x110 [ioatdma] ioat3_dma_probe+0x268/0x410 [ioatdma] ioat_pci_probe+0x122/0x1b0 [ioatdma] local_pci_probe+0x45/0xa0 pci_device_probe+0xd9/0x130 driver_probe_device+0x171/0x490 __driver_attach+0x93/0xa0 bus_for_each_dev+0x6b/0xb0 driver_attach+0x1e/0x20 bus_add_driver+0x1f8/0x2b0 driver_register+0x81/0x110 __pci_register_driver+0x60/0x70 ioat_init_module+0x89/0x1000 [ioatdma] do_one_initcall+0xe2/0x250 load_module+0x2313/0x2a00 SyS_init_module+0xd9/0x130 system_call_fastpath+0x1a/0x1f ---[ end trace 990c591681d27c31 ]--- Mapped at: debug_dma_map_page+0xbe/0x180 ioat_dma_self_test+0x1ab/0x680 [ioatdma] ioat3_dma_self_test+0x12/0x30 [ioatdma] ioat_probe+0xf4/0x110 [ioatdma] ioat3_dma_probe+0x268/0x410 [ioatdma] Signed-off-by: Jiang Liu Cc: Vinod Koul Cc: Dan Williams Cc: Bartlomiej Zolnierkiewicz Cc: Kyungmin Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dma/ioat/dma.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 1a49c777607c5..87529181efccb 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -817,7 +817,15 @@ int ioat_dma_self_test(struct ioatdma_device *device) } dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "mapping src buffer failed\n"); + goto free_resources; + } dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_dest)) { + dev_err(dev, "mapping dest buffer failed\n"); + goto unmap_src; + } flags = DMA_PREP_INTERRUPT; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, IOAT_TEST_SIZE, flags); @@ -855,8 +863,9 @@ int ioat_dma_self_test(struct ioatdma_device *device) } unmap_dma: - dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); +unmap_src: + dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); free_resources: dma->device_free_chan_resources(dma_chan); out: From ad70b029d2c678386384bd72c7fa2705c449b518 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Thu, 2 Jan 2014 12:58:53 -0800 Subject: [PATCH 134/164] sh: add EXPORT_SYMBOL(min_low_pfn) and EXPORT_SYMBOL(max_low_pfn) to sh_ksyms_32.c Min_low_pfn and max_low_pfn were used in pfn_valid macro if defined CONFIG_FLATMEM. When the functions that use the pfn_valid is used in driver module, max_low_pfn and min_low_pfn is to undefined, and fail to build. ERROR: "min_low_pfn" [drivers/block/aoe/aoe.ko] undefined! ERROR: "max_low_pfn" [drivers/block/aoe/aoe.ko] undefined! make[2]: *** [__modpost] Error 1 make[1]: *** [modules] Error 2 This patch fix this problem. Signed-off-by: Nobuhiro Iwamatsu Cc: Kuninori Morimoto Cc: Paul Mundt Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/kernel/sh_ksyms_32.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c index 2a0a596ebf67d..d77f2f6c7ff07 100644 --- a/arch/sh/kernel/sh_ksyms_32.c +++ b/arch/sh/kernel/sh_ksyms_32.c @@ -20,6 +20,11 @@ EXPORT_SYMBOL(csum_partial_copy_generic); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(empty_zero_page); +#ifdef CONFIG_FLATMEM +/* need in pfn_valid macro */ +EXPORT_SYMBOL(min_low_pfn); +EXPORT_SYMBOL(max_low_pfn); +#endif #define DECLARE_EXPORT(name) \ extern void name(void);EXPORT_SYMBOL(name) From 4ff36ee94d93ddb4b7846177f1118d9aa33408e2 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 2 Jan 2014 12:58:54 -0800 Subject: [PATCH 135/164] epoll: do not take the nested ep->mtx on EPOLL_CTL_DEL The EPOLL_CTL_DEL path of epoll contains a classic, ab-ba deadlock. That is, epoll_ctl(a, EPOLL_CTL_DEL, b, x), will deadlock with epoll_ctl(b, EPOLL_CTL_DEL, a, x). The deadlock was introduced with commmit 67347fe4e632 ("epoll: do not take global 'epmutex' for simple topologies"). The acquistion of the ep->mtx for the destination 'ep' was added such that a concurrent EPOLL_CTL_ADD operation would see the correct state of the ep (Specifically, the check for '!list_empty(&f.file->f_ep_links') However, by simply not acquiring the lock, we do not serialize behind the ep->mtx from the add path, and thus may perform a full path check when if we had waited a little longer it may not have been necessary. However, this is a transient state, and performing the full loop checking in this case is not harmful. The important point is that we wouldn't miss doing the full loop checking when required, since EPOLL_CTL_ADD always locks any 'ep's that its operating upon. The reason we don't need to do lock ordering in the add path, is that we are already are holding the global 'epmutex' whenever we do the double lock. Further, the original posting of this patch, which was tested for the intended performance gains, did not perform this additional locking. Signed-off-by: Jason Baron Cc: Nathan Zimmer Cc: Eric Wong Cc: Nelson Elhage Cc: Al Viro Cc: Davide Libenzi Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8b5e2584c8409..af903128891cf 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1907,10 +1907,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, } } } - if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) { - tep = tf.file->private_data; - mutex_lock_nested(&tep->mtx, 1); - } /* * Try to lookup the file inside our RB tree, Since we grabbed "mtx" From d6e0a2dd12f4067a5bcefb8bbd8ddbeff800afbc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Jan 2014 15:12:14 -0800 Subject: [PATCH 136/164] Linux 3.13-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ab80be7a38bca..ae1a55ad687c7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = One Giant Leap for Frogkind # *DOCUMENTATION* From 78a2e12f51d9ec37c328fa5a429c676eeeba8cd1 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 27 Nov 2013 02:18:26 +0100 Subject: [PATCH 137/164] iommu: shmobile: Enable driver compilation with COMPILE_TEST This helps increasing build testing coverage. The driver doesn't compile on non-ARM platforms due to usage of the ARM DMA IOMMU API, restrict compilation to ARM. Cc: Joerg Roedel Cc: iommu@lists.linux-foundation.org Signed-off-by: Laurent Pinchart Acked-by: Simon Horman Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3e7fdbb4916b1..79bbc21c1d016 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -207,6 +207,7 @@ config SHMOBILE_IOMMU bool "IOMMU for Renesas IPMMU/IPMMUI" default n depends on ARM + depends on SH_MOBILE || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU select SHMOBILE_IPMMU From cbff5634dcb78ae2c3a687de2b097fd26967604d Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 4 Dec 2013 17:22:53 -0800 Subject: [PATCH 138/164] iommu: add missing include Fix a warning in of_iommu.c: drivers/iommu/of_iommu.c:38:5: warning: no previous prototype for 'of_get_dma_window' [-Wmissing-prototypes] Signed-off-by: Brian Norris Cc: Hiroshi DOYU Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index ee249bc959f84..e550ccb7634e9 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include /** * of_get_dma_window - Parse *dma-window property and returns 0 if found. From b82a2272b37af1f1f86ee6e5966ad941f9db5dc7 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 5 Dec 2013 19:42:41 +0800 Subject: [PATCH 139/164] iommu/amd: Use dev_is_pci() to check whether it is pci device Use PCI standard marco dev_is_pci() instead of directly compare pci_bus_type to check whether it is pci device. Signed-off-by: Yijing Wang Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 72531f008a5e3..faf0da4bb3a2f 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -248,8 +248,8 @@ static bool check_device(struct device *dev) if (!dev || !dev->dma_mask) return false; - /* No device or no PCI device */ - if (dev->bus != &pci_bus_type) + /* No PCI device */ + if (!dev_is_pci(dev)) return false; devid = get_device_id(dev); From dbad086433af81513c84678070522455fefebe2a Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 5 Dec 2013 19:43:42 +0800 Subject: [PATCH 140/164] iommu/vt-d: Use dev_is_pci() to check whether it is pci device Use PCI standard marco dev_is_pci() instead of directly compare pci_bus_type to check whether it is pci device. Signed-off-by: Yijing Wang Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 43b9bfea48fa4..64d8942d6d14b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2758,7 +2758,7 @@ static int iommu_no_mapping(struct device *dev) struct pci_dev *pdev; int found; - if (unlikely(dev->bus != &pci_bus_type)) + if (unlikely(!dev_is_pci(dev))) return 1; pdev = to_pci_dev(dev); From b3eb76d17570bd17d280e5d7deb8aad6b6d83d8a Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 5 Dec 2013 19:42:49 +0800 Subject: [PATCH 141/164] iommu/fsl_pamu: Use dev_is_pci() to check whether it is pci device Use PCI standard marco dev_is_pci() instead of directly compare pci_bus_type to check whether it is pci device. Signed-off-by: Yijing Wang Acked-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index c857c30da9791..93072ba44b1d1 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -691,7 +691,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, * Use LIODN of the PCI controller while attaching a * PCI device. */ - if (dev->bus == &pci_bus_type) { + if (dev_is_pci(dev)) { pdev = to_pci_dev(dev); pci_ctl = pci_bus_to_host(pdev->bus); /* @@ -729,7 +729,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain, * Use LIODN of the PCI controller while detaching a * PCI device. */ - if (dev->bus == &pci_bus_type) { + if (dev_is_pci(dev)) { pdev = to_pci_dev(dev); pci_ctl = pci_bus_to_host(pdev->bus); /* @@ -1056,7 +1056,7 @@ static int fsl_pamu_add_device(struct device *dev) * For platform devices we allocate a separate group for * each of the devices. */ - if (dev->bus == &pci_bus_type) { + if (dev_is_pci(dev)) { pdev = to_pci_dev(dev); /* Don't create device groups for virtual PCI bridges */ if (pdev->subordinate) From 5b5c13996ff115599e13b719fc7e4c39746d3b30 Mon Sep 17 00:00:00 2001 From: "Upinder Malhi (umalhi)" Date: Wed, 11 Dec 2013 20:19:56 +0000 Subject: [PATCH 142/164] iommu: Rename domain_has_cap to iommu_domain_has_cap domain_has_cap is a misnomer bc the func name should be the same for CONFIG_IOMMU_API and !CONFIG_IOMMU_API. Signed-off-by: Upinder Malhi Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a444c790fa723..8876ef9821691 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -291,8 +291,8 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad return 0; } -static inline int domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static inline int iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) { return 0; } From dc89f797abdbfc58dfb28af944f80f0299a8fafa Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 17 Dec 2013 18:18:49 +0100 Subject: [PATCH 143/164] iommu/shmobile: Allocate archdata with kzalloc() The archdata attached_list field isn't initialized, leading to random crashes when accessed. Use kzalloc() to allocate the whole structure and make sure all fields get initialized properly. Signed-off-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/shmobile-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index d572863dfccd4..7a3b928fad1c8 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -380,14 +380,13 @@ int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu) kmem_cache_destroy(l1cache); return -ENOMEM; } - archdata = kmalloc(sizeof(*archdata), GFP_KERNEL); + archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); if (!archdata) { kmem_cache_destroy(l1cache); kmem_cache_destroy(l2cache); return -ENOMEM; } spin_lock_init(&archdata->attach_lock); - archdata->attached = NULL; archdata->ipmmu = ipmmu; ipmmu_archdata = archdata; bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops); From e87c621dced122a9a0a51b56944b08421d273c8f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 17 Dec 2013 18:18:50 +0100 Subject: [PATCH 144/164] iommu/shmobile: Turn the flush_lock mutex into a spinlock The lock is taken in atomic context, replace it with a spinlock. Signed-off-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/shmobile-ipmmu.c | 10 +++++----- drivers/iommu/shmobile-ipmmu.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/shmobile-ipmmu.c b/drivers/iommu/shmobile-ipmmu.c index 8321f89596c49..e3bc2e19b6dd8 100644 --- a/drivers/iommu/shmobile-ipmmu.c +++ b/drivers/iommu/shmobile-ipmmu.c @@ -35,12 +35,12 @@ void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu) if (!ipmmu) return; - mutex_lock(&ipmmu->flush_lock); + spin_lock(&ipmmu->flush_lock); if (ipmmu->tlb_enabled) ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN); else ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH); - mutex_unlock(&ipmmu->flush_lock); + spin_unlock(&ipmmu->flush_lock); } void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, @@ -49,7 +49,7 @@ void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, if (!ipmmu) return; - mutex_lock(&ipmmu->flush_lock); + spin_lock(&ipmmu->flush_lock); switch (size) { default: ipmmu->tlb_enabled = 0; @@ -85,7 +85,7 @@ void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, } ipmmu_reg_write(ipmmu, IMTTBR, phys); ipmmu_reg_write(ipmmu, IMASID, asid); - mutex_unlock(&ipmmu->flush_lock); + spin_unlock(&ipmmu->flush_lock); } static int ipmmu_probe(struct platform_device *pdev) @@ -104,7 +104,7 @@ static int ipmmu_probe(struct platform_device *pdev) dev_err(&pdev->dev, "cannot allocate device data\n"); return -ENOMEM; } - mutex_init(&ipmmu->flush_lock); + spin_lock_init(&ipmmu->flush_lock); ipmmu->dev = &pdev->dev; ipmmu->ipmmu_base = devm_ioremap_nocache(&pdev->dev, res->start, resource_size(res)); diff --git a/drivers/iommu/shmobile-ipmmu.h b/drivers/iommu/shmobile-ipmmu.h index 4d53684673e1f..9524743ca1fba 100644 --- a/drivers/iommu/shmobile-ipmmu.h +++ b/drivers/iommu/shmobile-ipmmu.h @@ -14,7 +14,7 @@ struct shmobile_ipmmu { struct device *dev; void __iomem *ipmmu_base; int tlb_enabled; - struct mutex flush_lock; + spinlock_t flush_lock; const char * const *dev_names; unsigned int num_dev_names; }; From 360eb3c5687e2df23e29e97878238765bfe6a756 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:08 +0800 Subject: [PATCH 145/164] iommu/vt-d: use dedicated bitmap to track remapping entry allocation status Currently Intel interrupt remapping drivers uses the "present" flag bit in remapping entry to track whether an entry is allocated or not. It works as follow: 1) allocate a remapping entry and set its "present" flag bit to 1 2) compose other fields for the entry 3) update the remapping entry with the composed value The remapping hardware may access the entry between step 1 and step 3, which then observers an entry with the "present" flag set but random values in all other fields. This patch introduces a dedicated bitmap to track remapping entry allocation status instead of sharing the "present" flag with hardware, thus eliminate the race window. It also simplifies the implementation. Tested-and-reviewed-by: Yijing Wang Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 55 ++++++++++++++--------------- include/linux/intel-iommu.h | 1 + 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index c988b8d85df88..3aa9b5c347e4e 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -72,7 +72,6 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) u16 index, start_index; unsigned int mask = 0; unsigned long flags; - int i; if (!count || !irq_iommu) return -1; @@ -96,32 +95,17 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) } raw_spin_lock_irqsave(&irq_2_ir_lock, flags); - do { - for (i = index; i < index + count; i++) - if (table->base[i].present) - break; - /* empty index found */ - if (i == index + count) - break; - - index = (index + count) % INTR_REMAP_TABLE_ENTRIES; - - if (index == start_index) { - raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); - printk(KERN_ERR "can't allocate an IRTE\n"); - return -1; - } - } while (1); - - for (i = index; i < index + count; i++) - table->base[i].present = 1; - - cfg->remapped = 1; - irq_iommu->iommu = iommu; - irq_iommu->irte_index = index; - irq_iommu->sub_handle = 0; - irq_iommu->irte_mask = mask; - + index = bitmap_find_free_region(table->bitmap, + INTR_REMAP_TABLE_ENTRIES, mask); + if (index < 0) { + pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id); + } else { + cfg->remapped = 1; + irq_iommu->iommu = iommu; + irq_iommu->irte_index = index; + irq_iommu->sub_handle = 0; + irq_iommu->irte_mask = mask; + } raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return index; @@ -254,6 +238,8 @@ static int clear_entries(struct irq_2_iommu *irq_iommu) set_64bit(&entry->low, 0); set_64bit(&entry->high, 0); } + bitmap_release_region(iommu->ir_table->bitmap, index, + irq_iommu->irte_mask); return qi_flush_iec(iommu, index, irq_iommu->irte_mask); } @@ -453,6 +439,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode) { struct ir_table *ir_table; struct page *pages; + unsigned long *bitmap; ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table), GFP_ATOMIC); @@ -464,13 +451,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode) INTR_REMAP_PAGE_ORDER); if (!pages) { - printk(KERN_ERR "failed to allocate pages of order %d\n", - INTR_REMAP_PAGE_ORDER); + pr_err("IR%d: failed to allocate pages of order %d\n", + iommu->seq_id, INTR_REMAP_PAGE_ORDER); kfree(iommu->ir_table); return -ENOMEM; } + bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES), + sizeof(long), GFP_ATOMIC); + if (bitmap == NULL) { + pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); + __free_pages(pages, INTR_REMAP_PAGE_ORDER); + kfree(ir_table); + return -ENOMEM; + } + ir_table->base = page_address(pages); + ir_table->bitmap = bitmap; iommu_set_irq_remapping(iommu, mode); return 0; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d380c5e680086..de1e5e9364207 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -288,6 +288,7 @@ struct q_inval { struct ir_table { struct irte *base; + unsigned long *bitmap; }; #endif From 4c071c7b851ba47f66cdff523160225288382c7b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 7 Jan 2014 22:55:06 +0100 Subject: [PATCH 146/164] drm/msm: Fix link error with !MSM_IOMMU The DRM driver for MSM depends on symbols from the MSM IOMMU driver. Add this dependency to the Kconfig file. Fixes this comile error: Kernel: arch/arm/boot/zImage is ready ERROR: "msm_iommu_get_ctx" [drivers/gpu/drm/msm/msm.ko] undefined! make[2]: *** [__modpost] Error 1 Cc: Rob Clark Signed-off-by: Joerg Roedel --- drivers/gpu/drm/msm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index f39ab7554fc99..d3de8e1ae9157 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -4,6 +4,7 @@ config DRM_MSM depends on DRM depends on ARCH_MSM depends on ARCH_MSM8960 + depends on MSM_IOMMU select DRM_KMS_HELPER select SHMEM select TMPFS From ada4d4b2a32e9f63d4dcb9f69578473408f4622c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:09 +0800 Subject: [PATCH 147/164] iommu/vt-d: fix PCI device reference leakage on error recovery path Function dmar_parse_dev_scope() should release the PCI device reference count gained in function dmar_parse_one_dev_scope() on error recovery, otherwise it will cause PCI device object leakage. This patch also introduces dmar_free_dev_scope(), which will be used to support DMAR device hotplug. Reviewed-by: Yijing Wang Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 14 ++++++++++++-- include/linux/dmar.h | 5 +++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index fb35d1bd19e10..28d93b68ff026 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -100,7 +100,6 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, if (!pdev) { pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n", segment, scope->bus, path->device, path->function); - *dev = NULL; return 0; } if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \ @@ -151,7 +150,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, ret = dmar_parse_one_dev_scope(scope, &(*devices)[index], segment); if (ret) { - kfree(*devices); + dmar_free_dev_scope(devices, cnt); return ret; } index ++; @@ -162,6 +161,17 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, return 0; } +void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt) +{ + if (*devices && *cnt) { + while (--*cnt >= 0) + pci_dev_put((*devices)[*cnt]); + kfree(*devices); + *devices = NULL; + *cnt = 0; + } +} + /** * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition * structure which uniquely represent one DMA remapping hardware unit diff --git a/include/linux/dmar.h b/include/linux/dmar.h index b029d1aa2d12a..205ee37eed73b 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -62,6 +62,9 @@ extern struct list_head dmar_drhd_units; extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); +extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, + struct pci_dev ***devices, u16 segment); +extern void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt); /* Intel IOMMU detection */ extern int detect_intel_iommu(void); @@ -157,8 +160,6 @@ struct dmar_atsr_unit { int dmar_parse_rmrr_atsr_dev(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); -extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct pci_dev ***devices, u16 segment); extern int intel_iommu_init(void); #else /* !CONFIG_INTEL_IOMMU: */ static inline int intel_iommu_init(void) { return -ENODEV; } From 18d99165d3ebe5e365de57bcc673901d754c7142 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:10 +0800 Subject: [PATCH 148/164] iommu/vt-d: fix a race window in allocating domain ID for virtual machines Function intel_iommu_domain_init() may be concurrently called by upper layer without serialization, so use atomic_t to protect domain id allocation. Signed-off-by: Jiang Liu Cc: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 64d8942d6d14b..e2d2cb3119b36 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3877,7 +3877,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) } /* domain id for virtual machine, it won't be set in context */ -static unsigned long vm_domid; +static atomic_t vm_domid = ATOMIC_INIT(0); static struct dmar_domain *iommu_alloc_vm_domain(void) { @@ -3887,7 +3887,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) if (!domain) return NULL; - domain->id = vm_domid++; + domain->id = atomic_inc_return(&vm_domid); domain->nid = -1; memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; From 852bdb04f81c276969d43b9e15048259d028881f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:11 +0800 Subject: [PATCH 149/164] iommu/vt-d: fix resource leakage on error recovery path in iommu_init_domains() Release allocated resources on error recovery path in function iommu_init_domains(). Also improve printk messages in iommu_init_domains(). Acked-by: Yijing Wang Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e2d2cb3119b36..0cbf1dda07308 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1255,8 +1255,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) unsigned long nlongs; ndomains = cap_ndoms(iommu->cap); - pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id, - ndomains); + pr_debug("IOMMU%d: Number of Domains supported <%ld>\n", + iommu->seq_id, ndomains); nlongs = BITS_TO_LONGS(ndomains); spin_lock_init(&iommu->lock); @@ -1266,13 +1266,17 @@ static int iommu_init_domains(struct intel_iommu *iommu) */ iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); if (!iommu->domain_ids) { - printk(KERN_ERR "Allocating domain id array failed\n"); + pr_err("IOMMU%d: allocating domain id array failed\n", + iommu->seq_id); return -ENOMEM; } iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), GFP_KERNEL); if (!iommu->domains) { - printk(KERN_ERR "Allocating domain array failed\n"); + pr_err("IOMMU%d: allocating domain array failed\n", + iommu->seq_id); + kfree(iommu->domain_ids); + iommu->domain_ids = NULL; return -ENOMEM; } From 5c645b35b77024fb440b2bc8847fa0193119b2a6 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:12 +0800 Subject: [PATCH 150/164] iommu/vt-d, trivial: refine support of 64bit guest address In Intel IOMMU driver, it calculate page table level from adjusted guest address width as 'level = (agaw - 30) / 9', which assumes (agaw -30) could be divided by 9. On the other hand, 64bit is a valid agaw and (64 - 30) can't be divided by 9, so it needs special handling. This patch enhances Intel IOMMU driver to correctly handle 64bit agaw. It's mainly for code readability because there's no hardware supporting 64bit agaw yet. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0cbf1dda07308..7bddb9b32da8d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -63,6 +63,7 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 #define MAX_AGAW_WIDTH 64 +#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) @@ -106,12 +107,12 @@ static inline int agaw_to_level(int agaw) static inline int agaw_to_width(int agaw) { - return 30 + agaw * LEVEL_STRIDE; + return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); } static inline int width_to_agaw(int width) { - return (width - 30) / LEVEL_STRIDE; + return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); } static inline unsigned int level_to_offset_bits(int level) @@ -141,7 +142,7 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) static inline unsigned long lvl_to_nr_pages(unsigned int lvl) { - return 1 << ((lvl - 1) * LEVEL_STRIDE); + return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); } /* VT-d pages must always be _smaller_ than MM pages. Otherwise things @@ -865,7 +866,6 @@ static int dma_pte_clear_range(struct dmar_domain *domain, int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned int large_page = 1; struct dma_pte *first_pte, *pte; - int order; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -890,8 +890,7 @@ static int dma_pte_clear_range(struct dmar_domain *domain, } while (start_pfn && start_pfn <= last_pfn); - order = (large_page - 1) * 9; - return order; + return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH); } static void dma_pte_free_level(struct dmar_domain *domain, int level, From 9544c003e85f6ac6b0b617e15266fe2e81caa42a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:13 +0800 Subject: [PATCH 151/164] iommu/vt-d, trivial: print correct domain id of static identity domain Field si_domain->id is set by iommu_attach_domain(), so we should only print domain id for static identity domain after calling iommu_attach_domain(si_domain, iommu), otherwise it's always zero. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7bddb9b32da8d..01922be564ca2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2248,8 +2248,6 @@ static int __init si_domain_init(int hw) if (!si_domain) return -EFAULT; - pr_debug("Identity mapping domain is domain %d\n", si_domain->id); - for_each_active_iommu(iommu, drhd) { ret = iommu_attach_domain(si_domain, iommu); if (ret) { @@ -2264,6 +2262,8 @@ static int __init si_domain_init(int hw) } si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + pr_debug("IOMMU: identity mapping domain is domain %d\n", + si_domain->id); if (hw) return 0; From b977e73a837963ad73d24db4ca7b71040791868c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:14 +0800 Subject: [PATCH 152/164] iommu/vt-d, trivial: check suitable flag in function detect_intel_iommu() Flag irq_remapping_enabled is only set by intel_enable_irq_remapping(), which is called after detect_intel_iommu(). So moving pr_info() from detect_intel_iommu() to intel_enable_irq_remapping(), which also slightly simplifies implementation. Reviewed-by: Yijing Wang Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 8 -------- drivers/iommu/intel_irq_remapping.c | 2 ++ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 28d93b68ff026..e4c3ea0224281 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -556,14 +556,6 @@ int __init detect_intel_iommu(void) if (ret) ret = check_zero_address(); { - struct acpi_table_dmar *dmar; - - dmar = (struct acpi_table_dmar *) dmar_tbl; - - if (ret && irq_remapping_enabled && cpu_has_x2apic && - dmar->flags & 0x1) - pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); - if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; /* Make sure ACS will be enabled */ diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 3aa9b5c347e4e..b9256a4f946dc 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -561,6 +561,8 @@ static int __init intel_enable_irq_remapping(void) } if (x2apic_present) { + pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); + eim = !dmar_x2apic_optout(); if (!eim) printk(KERN_WARNING From b8a2d2881e6682464f6a832eea4c9be298d70c41 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:15 +0800 Subject: [PATCH 153/164] iommu/vt-d, trivial: clean up unused code Remove dead code from VT-d related files. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel Conflicts: drivers/iommu/dmar.c --- drivers/iommu/dmar.c | 2 -- drivers/iommu/intel-iommu.c | 25 ------------------------- 2 files changed, 27 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index e4c3ea0224281..5a4e9afad3af3 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1113,8 +1113,6 @@ static const char *irq_remap_fault_reasons[] = "Blocked an interrupt request due to source-id verification failure", }; -#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) - static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) { if (fault_reason >= 0x20 && (fault_reason - 0x20 < diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 01922be564ca2..7a29a5e6e5a47 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -289,26 +289,6 @@ static inline void dma_clear_pte(struct dma_pte *pte) pte->val = 0; } -static inline void dma_set_pte_readable(struct dma_pte *pte) -{ - pte->val |= DMA_PTE_READ; -} - -static inline void dma_set_pte_writable(struct dma_pte *pte) -{ - pte->val |= DMA_PTE_WRITE; -} - -static inline void dma_set_pte_snp(struct dma_pte *pte) -{ - pte->val |= DMA_PTE_SNP; -} - -static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) -{ - pte->val = (pte->val & ~3) | (prot & 3); -} - static inline u64 dma_pte_addr(struct dma_pte *pte) { #ifdef CONFIG_64BIT @@ -319,11 +299,6 @@ static inline u64 dma_pte_addr(struct dma_pte *pte) #endif } -static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) -{ - pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; -} - static inline bool dma_pte_present(struct dma_pte *pte) { return (pte->val & 3) != 0; From 694835dc227ad203886457aa447025d09b2f7523 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:16 +0800 Subject: [PATCH 154/164] iommu/vt-d: mark internal functions as static Functions alloc_iommu() and parse_ioapics_under_ir() are only used internally, so mark them as static. [Joerg: Made detect_intel_iommu() non-static again for IA64] Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 5 ++++- drivers/iommu/intel_irq_remapping.c | 4 +++- include/linux/dmar.h | 4 +--- include/linux/intel-iommu.h | 1 - 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 5a4e9afad3af3..e3c03bb7c3743 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -52,6 +52,8 @@ LIST_HEAD(dmar_drhd_units); struct acpi_table_header * __initdata dmar_tbl; static acpi_size dmar_tbl_size; +static int alloc_iommu(struct dmar_drhd_unit *drhd); + static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { /* @@ -649,7 +651,7 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) return err; } -int alloc_iommu(struct dmar_drhd_unit *drhd) +static int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; u32 ver, sts; @@ -1366,4 +1368,5 @@ int __init dmar_ir_support(void) return 0; return dmar->flags & 0x1; } + IOMMU_INIT_POST(detect_intel_iommu); diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index b9256a4f946dc..10d3187e5fa07 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -40,6 +40,8 @@ static int ir_ioapic_num, ir_hpet_num; static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); +static int __init parse_ioapics_under_ir(void); + static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { struct irq_cfg *cfg = irq_get_chip_data(irq); @@ -773,7 +775,7 @@ static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header, * Finds the assocaition between IOAPIC's and its Interrupt-remapping * hardware unit. */ -int __init parse_ioapics_under_ir(void) +static int __init parse_ioapics_under_ir(void) { struct dmar_drhd_unit *drhd; int ir_supported = 0; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 205ee37eed73b..1a60dd630bb43 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -33,6 +33,7 @@ struct acpi_dmar_header; #define DMAR_X2APIC_OPT_OUT 0x2 struct intel_iommu; + #ifdef CONFIG_DMAR_TABLE extern struct acpi_table_header *dmar_tbl; struct dmar_drhd_unit { @@ -69,9 +70,6 @@ extern void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt); /* Intel IOMMU detection */ extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); - -extern int parse_ioapics_under_ir(void); -extern int alloc_iommu(struct dmar_drhd_unit *); #else static inline int detect_intel_iommu(void) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index de1e5e9364207..f2c4114b86652 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -348,7 +348,6 @@ static inline void __iommu_flush_cache( extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); -extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void dmar_disable_qi(struct intel_iommu *iommu); From 2fe2c6025d6a4939ae2fc97d1d761fc4a8d1abd9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:17 +0800 Subject: [PATCH 155/164] iommu/vt-d, trivial: use defined macro instead of hardcoding Use defined macro instead of hardcoding in function set_ioapic_sid() for readability. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 10d3187e5fa07..fdf57534b76cb 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -324,7 +324,7 @@ static int set_ioapic_sid(struct irte *irte, int apic) return -1; } - set_irte_sid(irte, 1, 0, sid); + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid); return 0; } From 7c9197791a0cbbbb0f74aade3339f8e5890fbd15 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:18 +0800 Subject: [PATCH 156/164] iommu/vt-d, trivial: simplify code with existing macros Simplify vt-d related code with existing macros and introduce a new macro for_each_active_drhd_unit() to enumerate all active DRHD unit. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 7 ++-- drivers/iommu/intel-iommu.c | 55 +++++++---------------------- drivers/iommu/intel_irq_remapping.c | 31 ++++++---------- include/linux/dmar.h | 4 +++ 4 files changed, 29 insertions(+), 68 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index e3c03bb7c3743..ee4cb1906e45d 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1305,15 +1305,14 @@ int dmar_set_interrupt(struct intel_iommu *iommu) int __init enable_drhd_fault_handling(void) { struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; /* * Enable fault control interrupt. */ - for_each_drhd_unit(drhd) { - int ret; - struct intel_iommu *iommu = drhd->iommu; + for_each_iommu(iommu, drhd) { u32 fault_status; - ret = dmar_set_interrupt(iommu); + int ret = dmar_set_interrupt(iommu); if (ret) { pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7a29a5e6e5a47..3731bf68ddce1 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -628,9 +628,7 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) struct dmar_drhd_unit *drhd = NULL; int i; - for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; + for_each_active_drhd_unit(drhd) { if (segment != drhd->segment) continue; @@ -2470,11 +2468,7 @@ static int __init init_dmars(void) goto error; } - for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; - - iommu = drhd->iommu; + for_each_active_iommu(iommu, drhd) { g_iommus[iommu->seq_id] = iommu; ret = iommu_init_domains(iommu); @@ -2498,12 +2492,7 @@ static int __init init_dmars(void) /* * Start from the sane iommu hardware state. */ - for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; - - iommu = drhd->iommu; - + for_each_active_iommu(iommu, drhd) { /* * If the queued invalidation is already initialized by us * (for example, while enabling interrupt-remapping) then @@ -2523,12 +2512,7 @@ static int __init init_dmars(void) dmar_disable_qi(iommu); } - for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; - - iommu = drhd->iommu; - + for_each_active_iommu(iommu, drhd) { if (dmar_enable_qi(iommu)) { /* * Queued Invalidate not enabled, use Register Based @@ -2611,17 +2595,16 @@ static int __init init_dmars(void) * global invalidate iotlb * enable translation */ - for_each_drhd_unit(drhd) { + for_each_iommu(iommu, drhd) { if (drhd->ignored) { /* * we always have to disable PMRs or DMA may fail on * this device */ if (force_on) - iommu_disable_protect_mem_regions(drhd->iommu); + iommu_disable_protect_mem_regions(iommu); continue; } - iommu = drhd->iommu; iommu_flush_write_buffer(iommu); @@ -2643,12 +2626,8 @@ static int __init init_dmars(void) return 0; error: - for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; - iommu = drhd->iommu; + for_each_active_iommu(iommu, drhd) free_iommu(iommu); - } kfree(g_iommus); return ret; } @@ -3296,9 +3275,9 @@ static void __init init_no_remapping_devices(void) } } - for_each_drhd_unit(drhd) { + for_each_active_drhd_unit(drhd) { int i; - if (drhd->ignored || drhd->include_all) + if (drhd->include_all) continue; for (i = 0; i < drhd->devices_cnt; i++) @@ -3647,6 +3626,7 @@ int __init intel_iommu_init(void) { int ret = 0; struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); @@ -3660,16 +3640,9 @@ int __init intel_iommu_init(void) /* * Disable translation if already enabled prior to OS handover. */ - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu; - - if (drhd->ignored) - continue; - - iommu = drhd->iommu; + for_each_active_iommu(iommu, drhd) if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); - } if (dmar_dev_scope_init() < 0) { if (force_on) @@ -3912,11 +3885,7 @@ static void iommu_free_vm_domain(struct dmar_domain *domain) unsigned long i; unsigned long ndomains; - for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; - iommu = drhd->iommu; - + for_each_active_iommu(iommu, drhd) { ndomains = cap_ndoms(iommu->cap); for_each_set_bit(i, iommu->domain_ids, ndomains) { if (iommu->domains[i] == domain) { diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index fdf57534b76cb..f307a3fb93ce6 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -520,6 +520,7 @@ static int __init dmar_x2apic_optout(void) static int __init intel_irq_remapping_supported(void) { struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; if (disable_irq_remap) return 0; @@ -538,12 +539,9 @@ static int __init intel_irq_remapping_supported(void) if (!dmar_ir_support()) return 0; - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - + for_each_iommu(iommu, drhd) if (!ecap_ir_support(iommu->ecap)) return 0; - } return 1; } @@ -551,6 +549,7 @@ static int __init intel_irq_remapping_supported(void) static int __init intel_enable_irq_remapping(void) { struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; bool x2apic_present; int setup = 0; int eim = 0; @@ -573,9 +572,7 @@ static int __init intel_enable_irq_remapping(void) "Use 'intremap=no_x2apic_optout' to override BIOS request.\n"); } - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - + for_each_iommu(iommu, drhd) { /* * If the queued invalidation is already initialized, * shouldn't disable it. @@ -600,9 +597,7 @@ static int __init intel_enable_irq_remapping(void) /* * check for the Interrupt-remapping support */ - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - + for_each_iommu(iommu, drhd) { if (!ecap_ir_support(iommu->ecap)) continue; @@ -616,10 +611,8 @@ static int __init intel_enable_irq_remapping(void) /* * Enable queued invalidation for all the DRHD's. */ - for_each_drhd_unit(drhd) { - int ret; - struct intel_iommu *iommu = drhd->iommu; - ret = dmar_enable_qi(iommu); + for_each_iommu(iommu, drhd) { + int ret = dmar_enable_qi(iommu); if (ret) { printk(KERN_ERR "DRHD %Lx: failed to enable queued, " @@ -632,9 +625,7 @@ static int __init intel_enable_irq_remapping(void) /* * Setup Interrupt-remapping for all the DRHD's now. */ - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - + for_each_iommu(iommu, drhd) { if (!ecap_ir_support(iommu->ecap)) continue; @@ -778,19 +769,17 @@ static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header, static int __init parse_ioapics_under_ir(void) { struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; int ir_supported = 0; int ioapic_idx; - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - + for_each_iommu(iommu, drhd) if (ecap_ir_support(iommu->ecap)) { if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu)) return -1; ir_supported = 1; } - } if (!ir_supported) return 0; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 1a60dd630bb43..eccb0c0c6cf63 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -53,6 +53,10 @@ extern struct list_head dmar_drhd_units; #define for_each_drhd_unit(drhd) \ list_for_each_entry(drhd, &dmar_drhd_units, list) +#define for_each_active_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) \ + if (drhd->ignored) {} else + #define for_each_active_iommu(i, drhd) \ list_for_each_entry(drhd, &dmar_drhd_units, list) \ if (i=drhd->iommu, drhd->ignored) {} else From b5f36d9e614135470da452f820f161c443d3c83c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:19 +0800 Subject: [PATCH 157/164] iommu/vt-d: fix invalid memory access when freeing DMAR irq In function free_dmar_iommu(), it sets IRQ handler data to NULL before calling free_irq(), which will cause invalid memory access because free_irq() will access IRQ handler data when calling function dmar_msi_mask(). So only set IRQ handler data to NULL after calling free_irq(). Sample stack dump: [ 13.094010] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [ 13.103215] IP: [] __lock_acquire+0x4d/0x12a0 [ 13.110104] PGD 0 [ 13.112614] Oops: 0000 [#1] SMP [ 13.116585] Modules linked in: [ 13.120260] CPU: 60 PID: 1 Comm: swapper/0 Tainted: G W 3.13.0-rc1-gerry+ #9 [ 13.129367] Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.99.99.x059.091020121352 09/10/2012 [ 13.142555] task: ffff88042dd38010 ti: ffff88042dd32000 task.ti: ffff88042dd32000 [ 13.151179] RIP: 0010:[] [] __lock_acquire+0x4d/0x12a0 [ 13.160867] RSP: 0000:ffff88042dd33b78 EFLAGS: 00010046 [ 13.166969] RAX: 0000000000000046 RBX: 0000000000000002 RCX: 0000000000000000 [ 13.175122] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000048 [ 13.183274] RBP: ffff88042dd33bd8 R08: 0000000000000002 R09: 0000000000000001 [ 13.191417] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88042dd38010 [ 13.199571] R13: 0000000000000000 R14: 0000000000000048 R15: 0000000000000000 [ 13.207725] FS: 0000000000000000(0000) GS:ffff88103f200000(0000) knlGS:0000000000000000 [ 13.217014] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 13.223596] CR2: 0000000000000048 CR3: 0000000001a0b000 CR4: 00000000000407e0 [ 13.231747] Stack: [ 13.234160] 0000000000000004 0000000000000046 ffff88042dd33b98 ffffffff810a567d [ 13.243059] ffff88042dd33c08 ffffffff810bb14c ffffffff828995a0 0000000000000046 [ 13.251969] 0000000000000000 0000000000000000 0000000000000002 0000000000000000 [ 13.260862] Call Trace: [ 13.263775] [] ? trace_hardirqs_off+0xd/0x10 [ 13.270571] [] ? vprintk_emit+0x23c/0x570 [ 13.277058] [] lock_acquire+0x93/0x120 [ 13.283269] [] ? dmar_msi_mask+0x47/0x70 [ 13.289677] [] _raw_spin_lock_irqsave+0x49/0x90 [ 13.296748] [] ? dmar_msi_mask+0x47/0x70 [ 13.303153] [] dmar_msi_mask+0x47/0x70 [ 13.309354] [] irq_shutdown+0x53/0x60 [ 13.315467] [] __free_irq+0x26d/0x280 [ 13.321580] [] free_irq+0xf0/0x180 [ 13.327395] [] free_dmar_iommu+0x271/0x2b0 [ 13.333996] [] ? trace_hardirqs_on+0xd/0x10 [ 13.340696] [] free_iommu+0x17/0x50 [ 13.346597] [] init_dmars+0x691/0x77a [ 13.352711] [] intel_iommu_init+0x351/0x438 [ 13.359400] [] ? iommu_setup+0x27d/0x27d [ 13.365806] [] pci_iommu_init+0x28/0x52 [ 13.372114] [] do_one_initcall+0x122/0x180 [ 13.378707] [] ? parse_args+0x1e8/0x320 [ 13.385016] [] kernel_init_freeable+0x1e1/0x26c [ 13.392100] [] ? do_early_param+0x88/0x88 [ 13.398596] [] ? rest_init+0xd0/0xd0 [ 13.404614] [] kernel_init+0xe/0x130 [ 13.410626] [] ret_from_fork+0x7c/0xb0 [ 13.416829] [] ? rest_init+0xd0/0xd0 [ 13.422842] Code: ec 99 00 85 c0 8b 05 53 05 a5 00 41 0f 45 d8 85 c0 0f 84 ff 00 00 00 8b 05 99 f9 7e 01 49 89 fe 41 89 f7 85 c0 0f 84 03 01 00 00 <49> 8b 06 be 01 00 00 00 48 3d c0 0e 01 82 0f 44 de 41 83 ff 01 [ 13.450191] RIP [] __lock_acquire+0x4d/0x12a0 [ 13.458598] RSP [ 13.462671] CR2: 0000000000000048 [ 13.466551] ---[ end trace c5bd26a37c81d760 ]--- Reviewed-by: Yijing Wang Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3731bf68ddce1..7a0984d1c8d5d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1291,9 +1291,9 @@ void free_dmar_iommu(struct intel_iommu *iommu) iommu_disable_translation(iommu); if (iommu->irq) { - irq_set_handler_data(iommu->irq, NULL); /* This will mask the irq */ free_irq(iommu->irq, iommu); + irq_set_handler_data(iommu->irq, NULL); destroy_irq(iommu->irq); } From a868e6b7b661c3d3e7e681a16d0b205971987c99 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:20 +0800 Subject: [PATCH 158/164] iommu/vt-d: keep shared resources when failed to initialize iommu devices Data structure drhd->iommu is shared between DMA remapping driver and interrupt remapping driver, so DMA remapping driver shouldn't release drhd->iommu when it failed to initialize IOMMU devices. Otherwise it may cause invalid memory access to the interrupt remapping driver. Sample stack dump: [ 13.315090] BUG: unable to handle kernel paging request at ffffc9000605a088 [ 13.323221] IP: [] qi_submit_sync+0x15c/0x400 [ 13.330107] PGD 82f81e067 PUD c2f81e067 PMD 82e846067 PTE 0 [ 13.336818] Oops: 0002 [#1] SMP [ 13.340757] Modules linked in: [ 13.344422] CPU: 0 PID: 4 Comm: kworker/0:0 Not tainted 3.13.0-rc1-gerry+ #7 [ 13.352474] Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.99.99.x059.091020121352 09/10/2012 [ 13.365659] Workqueue: events work_for_cpu_fn [ 13.370774] task: ffff88042ddf00d0 ti: ffff88042ddee000 task.ti: ffff88042dde e000 [ 13.379389] RIP: 0010:[] [] qi_submit_sy nc+0x15c/0x400 [ 13.389055] RSP: 0000:ffff88042ddef940 EFLAGS: 00010002 [ 13.395151] RAX: 00000000000005e0 RBX: 0000000000000082 RCX: 0000000200000025 [ 13.403308] RDX: ffffc9000605a000 RSI: 0000000000000010 RDI: ffff88042ddb8610 [ 13.411446] RBP: ffff88042ddef9a0 R08: 00000000000005d0 R09: 0000000000000001 [ 13.419599] R10: 0000000000000000 R11: 000000000000005d R12: 000000000000005c [ 13.427742] R13: ffff88102d84d300 R14: 0000000000000174 R15: ffff88042ddb4800 [ 13.435877] FS: 0000000000000000(0000) GS:ffff88043de00000(0000) knlGS:00000 00000000000 [ 13.445168] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 13.451749] CR2: ffffc9000605a088 CR3: 0000000001a0b000 CR4: 00000000000407f0 [ 13.459895] Stack: [ 13.462297] ffff88042ddb85d0 000000000000005d ffff88042ddef9b0 0000000000000 5d0 [ 13.471147] 00000000000005c0 ffff88042ddb8000 000000000000005c 0000000000000 015 [ 13.480001] ffff88042ddb4800 0000000000000282 ffff88042ddefa40 ffff88042ddef ac0 [ 13.488855] Call Trace: [ 13.491771] [] modify_irte+0x9d/0xd0 [ 13.497778] [] intel_setup_ioapic_entry+0x10d/0x290 [ 13.505250] [] ? trace_hardirqs_on_caller+0x16/0x1e0 [ 13.512824] [] ? default_init_apic_ldr+0x60/0x60 [ 13.519998] [] setup_ioapic_remapped_entry+0x20/0x30 [ 13.527566] [] io_apic_setup_irq_pin+0x12a/0x2c0 [ 13.534742] [] ? acpi_pci_irq_find_prt_entry+0x2b9/0x2d8 [ 13.544102] [] io_apic_setup_irq_pin_once+0x85/0xa0 [ 13.551568] [] ? mp_find_ioapic_pin+0x8f/0xf0 [ 13.558434] [] io_apic_set_pci_routing+0x34/0x70 [ 13.565621] [] mp_register_gsi+0xaf/0x1c0 [ 13.572111] [] acpi_register_gsi_ioapic+0xe/0x10 [ 13.579286] [] acpi_register_gsi+0xf/0x20 [ 13.585779] [] acpi_pci_irq_enable+0x171/0x1e3 [ 13.592764] [] pcibios_enable_device+0x31/0x40 [ 13.599744] [] do_pci_enable_device+0x3b/0x60 [ 13.606633] [] pci_enable_device_flags+0xc8/0x120 [ 13.613887] [] pci_enable_device+0x13/0x20 [ 13.620484] [] pcie_port_device_register+0x1e/0x510 [ 13.627947] [] ? trace_hardirqs_on_caller+0x16/0x1e0 [ 13.635510] [] ? trace_hardirqs_on+0xd/0x10 [ 13.642189] [] pcie_portdrv_probe+0x58/0xc0 [ 13.648877] [] local_pci_probe+0x45/0xa0 [ 13.655266] [] work_for_cpu_fn+0x14/0x20 [ 13.661656] [] process_one_work+0x369/0x710 [ 13.668334] [] ? process_one_work+0x2f2/0x710 [ 13.675215] [] ? worker_thread+0x46/0x690 [ 13.681714] [] worker_thread+0x484/0x690 [ 13.688109] [] ? cancel_delayed_work_sync+0x20/0x20 [ 13.695576] [] kthread+0xf0/0x110 [ 13.701300] [] ? local_clock+0x3f/0x50 [ 13.707492] [] ? kthread_create_on_node+0x250/0x250 [ 13.714959] [] ret_from_fork+0x7c/0xb0 [ 13.721152] [] ? kthread_create_on_node+0x250/0x250 Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 56 ++++++++++++++++++++++++----------- drivers/iommu/intel-iommu.c | 13 +++----- include/linux/dma_remapping.h | 4 --- include/linux/intel-iommu.h | 1 - 4 files changed, 43 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index ee4cb1906e45d..b0df78f9cd28b 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -53,6 +53,7 @@ struct acpi_table_header * __initdata dmar_tbl; static acpi_size dmar_tbl_size; static int alloc_iommu(struct dmar_drhd_unit *drhd); +static void free_iommu(struct intel_iommu *iommu); static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { @@ -205,25 +206,28 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) return 0; } +static void dmar_free_drhd(struct dmar_drhd_unit *dmaru) +{ + if (dmaru->devices && dmaru->devices_cnt) + dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt); + if (dmaru->iommu) + free_iommu(dmaru->iommu); + kfree(dmaru); +} + static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) { struct acpi_dmar_hardware_unit *drhd; - int ret = 0; drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; if (dmaru->include_all) return 0; - ret = dmar_parse_dev_scope((void *)(drhd + 1), - ((void *)drhd) + drhd->header.length, - &dmaru->devices_cnt, &dmaru->devices, - drhd->segment); - if (ret) { - list_del(&dmaru->list); - kfree(dmaru); - } - return ret; + return dmar_parse_dev_scope((void *)(drhd + 1), + ((void *)drhd) + drhd->header.length, + &dmaru->devices_cnt, &dmaru->devices, + drhd->segment); } #ifdef CONFIG_ACPI_NUMA @@ -435,7 +439,7 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) int __init dmar_dev_scope_init(void) { static int dmar_dev_scope_initialized; - struct dmar_drhd_unit *drhd, *drhd_n; + struct dmar_drhd_unit *drhd; int ret = -ENODEV; if (dmar_dev_scope_initialized) @@ -444,7 +448,7 @@ int __init dmar_dev_scope_init(void) if (list_empty(&dmar_drhd_units)) goto fail; - list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) { + list_for_each_entry(drhd, &dmar_drhd_units, list) { ret = dmar_parse_dev(drhd); if (ret) goto fail; @@ -725,12 +729,13 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) return err; } -void free_iommu(struct intel_iommu *iommu) +static void free_iommu(struct intel_iommu *iommu) { - if (!iommu) - return; - - free_dmar_iommu(iommu); + if (iommu->irq) { + free_irq(iommu->irq, iommu); + irq_set_handler_data(iommu->irq, NULL); + destroy_irq(iommu->irq); + } if (iommu->reg) unmap_iommu(iommu); @@ -1368,4 +1373,21 @@ int __init dmar_ir_support(void) return dmar->flags & 0x1; } +static int __init dmar_free_unused_resources(void) +{ + struct dmar_drhd_unit *dmaru, *dmaru_n; + + /* DMAR units are in use */ + if (irq_remapping_enabled || intel_iommu_enabled) + return 0; + + list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) { + list_del(&dmaru->list); + dmar_free_drhd(dmaru); + } + + return 0; +} + +late_initcall(dmar_free_unused_resources); IOMMU_INIT_POST(detect_intel_iommu); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7a0984d1c8d5d..fd9e369a8cf78 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1265,7 +1265,7 @@ static int iommu_init_domains(struct intel_iommu *iommu) static void domain_exit(struct dmar_domain *domain); static void vm_domain_exit(struct dmar_domain *domain); -void free_dmar_iommu(struct intel_iommu *iommu) +static void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; int i; @@ -1290,15 +1290,10 @@ void free_dmar_iommu(struct intel_iommu *iommu) if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); - if (iommu->irq) { - /* This will mask the irq */ - free_irq(iommu->irq, iommu); - irq_set_handler_data(iommu->irq, NULL); - destroy_irq(iommu->irq); - } - kfree(iommu->domains); kfree(iommu->domain_ids); + iommu->domains = NULL; + iommu->domain_ids = NULL; g_iommus[iommu->seq_id] = NULL; @@ -2627,7 +2622,7 @@ static int __init init_dmars(void) return 0; error: for_each_active_iommu(iommu, drhd) - free_iommu(iommu); + free_dmar_iommu(iommu); kfree(g_iommus); return ret; } diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 57c9a8ae4f2df..7ac17f57250e4 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -27,7 +27,6 @@ struct root_entry; #ifdef CONFIG_INTEL_IOMMU -extern void free_dmar_iommu(struct intel_iommu *iommu); extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; @@ -41,9 +40,6 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) { return 0; } -static inline void free_dmar_iommu(struct intel_iommu *iommu) -{ -} #define dmar_disabled (1) #define intel_iommu_enabled (0) #endif diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f2c4114b86652..2c4bed593b323 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -348,7 +348,6 @@ static inline void __iommu_flush_cache( extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); -extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void dmar_disable_qi(struct intel_iommu *iommu); extern int dmar_reenable_qi(struct intel_iommu *iommu); From 5ced12af691771a424fc3bcabecd668025517ebd Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:22 +0800 Subject: [PATCH 159/164] iommu/vt-d: fix access after free issue in function free_dmar_iommu() Function free_dmar_iommu() may access domain->iommu_lock by spin_unlock_irqrestore(&domain->iommu_lock, flags); after freeing corresponding domain structure. Sample stack dump: [ 8.912818] ========================= [ 8.917072] [ BUG: held lock freed! ] [ 8.921335] 3.13.0-rc1-gerry+ #12 Not tainted [ 8.926375] ------------------------- [ 8.930629] swapper/0/1 is freeing memory ffff880c23b56040-ffff880c23b5613f, with a lock still held there! [ 8.941675] (&(&domain->iommu_lock)->rlock){......}, at: [] init_dmars+0x72c/0x95b [ 8.952582] 1 lock held by swapper/0/1: [ 8.957031] #0: (&(&domain->iommu_lock)->rlock){......}, at: [] init_dmars+0x72c/0x95b [ 8.968487] [ 8.968487] stack backtrace: [ 8.973602] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.13.0-rc1-gerry+ #12 [ 8.981556] Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.99.99.x059.091020121352 09/10/2012 [ 8.994742] ffff880c23b56040 ffff88042dd33c98 ffffffff815617fd ffff88042dd38b28 [ 9.003566] ffff88042dd33cd0 ffffffff810a977a ffff880c23b56040 0000000000000086 [ 9.012403] ffff88102c4923c0 ffff88042ddb4800 ffffffff81b1e8c0 ffff88042dd33d28 [ 9.021240] Call Trace: [ 9.024138] [] dump_stack+0x4d/0x66 [ 9.030057] [] debug_check_no_locks_freed+0x15a/0x160 [ 9.037723] [] kmem_cache_free+0x62/0x5b0 [ 9.044225] [] domain_exit+0x197/0x1c0 [ 9.050418] [] init_dmars+0x758/0x95b [ 9.056527] [] intel_iommu_init+0x351/0x438 [ 9.063207] [] ? iommu_setup+0x27d/0x27d [ 9.069601] [] pci_iommu_init+0x28/0x52 [ 9.075910] [] do_one_initcall+0x122/0x180 [ 9.082509] [] ? parse_args+0x1e8/0x320 [ 9.088815] [] kernel_init_freeable+0x1e1/0x26c [ 9.095895] [] ? do_early_param+0x88/0x88 [ 9.102396] [] ? rest_init+0xd0/0xd0 [ 9.108410] [] kernel_init+0xe/0x130 [ 9.114423] [] ret_from_fork+0x7c/0xb0 [ 9.120612] [] ? rest_init+0xd0/0xd0 Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fd9e369a8cf78..dec715c7e5256 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1268,7 +1268,7 @@ static void vm_domain_exit(struct dmar_domain *domain); static void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; - int i; + int i, count; unsigned long flags; if ((iommu->domains) && (iommu->domain_ids)) { @@ -1277,13 +1277,14 @@ static void free_dmar_iommu(struct intel_iommu *iommu) clear_bit(i, iommu->domain_ids); spin_lock_irqsave(&domain->iommu_lock, flags); - if (--domain->iommu_count == 0) { + count = --domain->iommu_count; + spin_unlock_irqrestore(&domain->iommu_lock, flags); + if (count == 0) { if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) vm_domain_exit(domain); else domain_exit(domain); } - spin_unlock_irqrestore(&domain->iommu_lock, flags); } } From a84da70b7ba0c5236fccf25115acefc235ed65f9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:23 +0800 Subject: [PATCH 160/164] iommu/vt-d: release invalidation queue when destroying IOMMU unit Release associated invalidation queue when destroying IOMMU unit to avoid memory leak. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index b0df78f9cd28b..726cfe296d990 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -737,6 +737,12 @@ static void free_iommu(struct intel_iommu *iommu) destroy_irq(iommu->irq); } + if (iommu->qi) { + free_page((unsigned long)iommu->qi->desc); + kfree(iommu->qi->desc_status); + kfree(iommu->qi); + } + if (iommu->reg) unmap_iommu(iommu); From cc05301fd54f3e166aedf24e39f6731c4dec0451 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:24 +0800 Subject: [PATCH 161/164] iommu/vt-d: fix wrong return value of dmar_table_init() If dmar_table_init() fails to detect DMAR table on the first call, it will return wrong result on following calls because it always sets dmar_table_initialized no matter if succeeds or fails to detect DMAR table. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 726cfe296d990..753c7ecf66a80 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -472,24 +472,23 @@ int __init dmar_table_init(void) static int dmar_table_initialized; int ret; - if (dmar_table_initialized) - return 0; - - dmar_table_initialized = 1; - - ret = parse_dmar_table(); - if (ret) { - if (ret != -ENODEV) - pr_info("parse DMAR table failure.\n"); - return ret; - } + if (dmar_table_initialized == 0) { + ret = parse_dmar_table(); + if (ret < 0) { + if (ret != -ENODEV) + pr_info("parse DMAR table failure.\n"); + } else if (list_empty(&dmar_drhd_units)) { + pr_info("No DMAR devices found\n"); + ret = -ENODEV; + } - if (list_empty(&dmar_drhd_units)) { - pr_info("No DMAR devices found\n"); - return -ENODEV; + if (ret < 0) + dmar_table_initialized = ret; + else + dmar_table_initialized = 1; } - return 0; + return dmar_table_initialized < 0 ? dmar_table_initialized : 0; } static void warn_invalid_dmar(u64 addr, const char *message) From b707cb027edf5b7ff1b8637c184b9a58d74e5159 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:26 +0800 Subject: [PATCH 162/164] iommu/vt-d, trivial: clean sparse warnings Clean up most sparse warnings in Intel DMA and interrupt remapping drivers. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 6 +++--- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/irq_remapping.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 753c7ecf66a80..1581565434106 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -572,7 +572,7 @@ int __init detect_intel_iommu(void) x86_init.iommu.iommu_init = intel_iommu_init; #endif } - early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); + early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; return ret ? 1 : -ENODEV; @@ -1064,7 +1064,7 @@ int dmar_enable_qi(struct intel_iommu *iommu) desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); if (!desc_page) { kfree(qi); - iommu->qi = 0; + iommu->qi = NULL; return -ENOMEM; } @@ -1074,7 +1074,7 @@ int dmar_enable_qi(struct intel_iommu *iommu) if (!qi->desc_status) { free_page((unsigned long) qi->desc); kfree(qi); - iommu->qi = 0; + iommu->qi = NULL; return -ENOMEM; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index dec715c7e5256..948c6a0d0f5a3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -382,7 +382,7 @@ struct device_domain_info { static void flush_unmaps_timeout(unsigned long data); -DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); +static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); #define HIGH_WATER_MARK 250 struct deferred_flush_tables { diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 3b05d1b939c54..228632c99adba 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -295,8 +295,8 @@ int setup_ioapic_remapped_entry(int irq, vector, attr); } -int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) +static int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) { if (!config_enabled(CONFIG_SMP) || !remap_ops || !remap_ops->set_affinity) From 9bdc531ec63bf894c5e3b7b5a766ce342eb2f52e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 6 Jan 2014 14:18:27 +0800 Subject: [PATCH 163/164] iommu/vt-d: free all resources if failed to initialize DMARs Enhance intel_iommu_init() to free all resources if failed to initialize DMAR hardware. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 81 ++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 948c6a0d0f5a3..5ac7efc70ca90 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2624,6 +2624,7 @@ static int __init init_dmars(void) error: for_each_active_iommu(iommu, drhd) free_dmar_iommu(iommu); + kfree(deferred_flush); kfree(g_iommus); return ret; } @@ -3467,18 +3468,12 @@ static int __init rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) { struct acpi_dmar_reserved_memory *rmrr; - int ret; rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; - ret = dmar_parse_dev_scope((void *)(rmrr + 1), - ((void *)rmrr) + rmrr->header.length, - &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - - if (ret || (rmrru->devices_cnt == 0)) { - list_del(&rmrru->list); - kfree(rmrru); - } - return ret; + return dmar_parse_dev_scope((void *)(rmrr + 1), + ((void *)rmrr) + rmrr->header.length, + &rmrru->devices_cnt, &rmrru->devices, + rmrr->segment); } static LIST_HEAD(dmar_atsr_units); @@ -3503,23 +3498,39 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) { - int rc; struct acpi_dmar_atsr *atsr; if (atsru->include_all) return 0; atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); - rc = dmar_parse_dev_scope((void *)(atsr + 1), - (void *)atsr + atsr->header.length, - &atsru->devices_cnt, &atsru->devices, - atsr->segment); - if (rc || !atsru->devices_cnt) { - list_del(&atsru->list); - kfree(atsru); + return dmar_parse_dev_scope((void *)(atsr + 1), + (void *)atsr + atsr->header.length, + &atsru->devices_cnt, &atsru->devices, + atsr->segment); +} + +static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) +{ + dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); + kfree(atsru); +} + +static void intel_iommu_free_dmars(void) +{ + struct dmar_rmrr_unit *rmrru, *rmrr_n; + struct dmar_atsr_unit *atsru, *atsr_n; + + list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { + list_del(&rmrru->list); + dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); + kfree(rmrru); } - return rc; + list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { + list_del(&atsru->list); + intel_iommu_free_atsr(atsru); + } } int dmar_find_matched_atsr_unit(struct pci_dev *dev) @@ -3563,17 +3574,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) int __init dmar_parse_rmrr_atsr_dev(void) { - struct dmar_rmrr_unit *rmrr, *rmrr_n; - struct dmar_atsr_unit *atsr, *atsr_n; + struct dmar_rmrr_unit *rmrr; + struct dmar_atsr_unit *atsr; int ret = 0; - list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { + list_for_each_entry(rmrr, &dmar_rmrr_units, list) { ret = rmrr_parse_dev(rmrr); if (ret) return ret; } - list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { + list_for_each_entry(atsr, &dmar_atsr_units, list) { ret = atsr_parse_dev(atsr); if (ret) return ret; @@ -3620,7 +3631,7 @@ static struct notifier_block device_nb = { int __init intel_iommu_init(void) { - int ret = 0; + int ret = -ENODEV; struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; @@ -3630,7 +3641,7 @@ int __init intel_iommu_init(void) if (dmar_table_init()) { if (force_on) panic("tboot: Failed to initialize DMAR table\n"); - return -ENODEV; + goto out_free_dmar; } /* @@ -3643,16 +3654,16 @@ int __init intel_iommu_init(void) if (dmar_dev_scope_init() < 0) { if (force_on) panic("tboot: Failed to initialize DMAR device scope\n"); - return -ENODEV; + goto out_free_dmar; } if (no_iommu || dmar_disabled) - return -ENODEV; + goto out_free_dmar; if (iommu_init_mempool()) { if (force_on) panic("tboot: Failed to initialize iommu memory\n"); - return -ENODEV; + goto out_free_dmar; } if (list_empty(&dmar_rmrr_units)) @@ -3664,7 +3675,7 @@ int __init intel_iommu_init(void) if (dmar_init_reserved_ranges()) { if (force_on) panic("tboot: Failed to reserve iommu ranges\n"); - return -ENODEV; + goto out_free_mempool; } init_no_remapping_devices(); @@ -3674,9 +3685,7 @@ int __init intel_iommu_init(void) if (force_on) panic("tboot: Failed to initialize DMARs\n"); printk(KERN_ERR "IOMMU: dmar init failed\n"); - put_iova_domain(&reserved_iova_list); - iommu_exit_mempool(); - return ret; + goto out_free_reserved_range; } printk(KERN_INFO "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); @@ -3696,6 +3705,14 @@ int __init intel_iommu_init(void) intel_iommu_enabled = 1; return 0; + +out_free_reserved_range: + put_iova_domain(&reserved_iova_list); +out_free_mempool: + iommu_exit_mempool(); +out_free_dmar: + intel_iommu_free_dmars(); + return ret; } static void iommu_detach_dependent_devices(struct intel_iommu *iommu, From 9f4c7448f46b881119998702530676b3400752a9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Jan 2014 08:32:36 +0300 Subject: [PATCH 164/164] iommu/vt-d: Fix signedness bug in alloc_irte() "index" needs to be signed for the error handling to work. I deleted a little bit of obsolete cruft related to "index" and "start_index" as well. Fixes: 360eb3c5687e ('iommu/vt-d: use dedicated bitmap to track remapping entry allocation status') Signed-off-by: Dan Carpenter Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index f307a3fb93ce6..b30b423c5aca7 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -71,18 +71,13 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) struct ir_table *table = iommu->ir_table; struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); struct irq_cfg *cfg = irq_get_chip_data(irq); - u16 index, start_index; unsigned int mask = 0; unsigned long flags; + int index; if (!count || !irq_iommu) return -1; - /* - * start the IRTE search from index 0. - */ - index = start_index = 0; - if (count > 1) { count = __roundup_pow_of_two(count); mask = ilog2(count);