From 9f591ae60e1be026901398ef99eede91237aa3a1 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 21 Mar 2018 15:08:47 +0100 Subject: [PATCH 001/660] drm/i915/gvt: throw error on unhandled vfio ioctls On unknown/unhandled ioctls the driver should return an error, so userspace knows it tried to use something unsupported. Cc: stable@vger.kernel.org Signed-off-by: Gerd Hoffmann Reviewed-by: Alex Williamson Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 520fe3d0a882..f8540cc67b44 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1254,7 +1254,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } - return 0; + return -ENOTTY; } static ssize_t From ac0fd9cfc837d1d30cb958dc34769e90aad43078 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 22 Mar 2018 12:27:54 -0500 Subject: [PATCH 002/660] drm/i915/gvt: Mark expected switch fall-through in handle_g2v_notification In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 1466154 ("Missing break in switch") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8c5d5d005854..a33c1c3e4a21 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1150,6 +1150,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; + /* fall through */ case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps); return PTR_ERR_OR_ZERO(mm); From 5da795b061a139b55d7393e79c7af688c58a4267 Mon Sep 17 00:00:00 2001 From: Zhipeng Gong Date: Mon, 26 Mar 2018 15:18:56 +0800 Subject: [PATCH 003/660] drm/i915/gvt: Make MI_USER_INTERRUPT nop in cmd parser GVT-g dispatches request to host i915 and depends on i915 notify ring interrupt mechanism to check completion of request. For now MI_USER_INTERRUPT in guest requests is passed through in GVT-g cmd parser and i915 does not use it, which causes unnecessary interrupt handling in i915. On the other hand, if several requests from guest are combined into one request in and contain MI_USER_INTERRUPT in the middle of combined request. GVT-g still has to wait on the whole request to complete to inject user interrupts to guest. This patch makes all the MI_USER_INTERRUPT nop to save some interrupt handling. Here is test result to run glmark2 on guest for 10 seconds: host master interrupts number is reduced from 16021 to 11162 host user interrupts number is reduced from 7936 to 3536 v2: - revise commit message. (Kevin) Reviewed-by: Kevin Tian Signed-off-by: Zhipeng Gong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index c8454ac43fae..d301a204e981 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1079,6 +1079,7 @@ static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s) { set_bit(cmd_interrupt_events[s->ring_id].mi_user_interrupt, s->workload->pending_events); + patch_value(s, cmd_ptr(s, 0), MI_NOOP); return 0; } From ab13a9218c9883d1f51940b9e720c63ef54a2c07 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 18 Jan 2018 18:40:25 -0800 Subject: [PATCH 004/660] ecryptfs: lookup: Don't check if mount_crypt_stat is NULL mount_crypt_stat is assigned to &ecryptfs_superblock_to_private(ecryptfs_dentry->d_sb)->mount_crypt_stat, and mount_crypt_stat is not the first object in struct ecryptfs_sb_info. mount_crypt_stat is therefore never NULL. At the same time, no crash in ecryptfs_lookup() has been reported, and the lookup functions in other file systems don't check if d_sb is NULL either. Given that, remove the NULL check. Signed-off-by: Guenter Roeck Signed-off-by: Tyler Hicks --- fs/ecryptfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 847904aa63a9..97d17eaeba07 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -395,8 +395,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; - if (mount_crypt_stat - && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) { + if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { rc = ecryptfs_encrypt_and_encode_filename( &encrypted_and_encoded_name, &len, mount_crypt_stat, name, len); From f62fd7a77717350e850f3c4a5373fe8e64871025 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Mar 2018 09:07:08 +0000 Subject: [PATCH 005/660] ecryptfs: fix spelling mistake: "cadidate" -> "candidate" Trivial fix to spelling mistake in debug message text. Signed-off-by: Colin Ian King Signed-off-by: Tyler Hicks --- fs/ecryptfs/keystore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index c89a58cfc991..e74fe84d0886 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1880,7 +1880,7 @@ find_next_matching_auth_tok: candidate_auth_tok = &auth_tok_list_item->auth_tok; if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, - "Considering cadidate auth tok:\n"); + "Considering candidate auth tok:\n"); ecryptfs_dump_auth_tok(candidate_auth_tok); } rc = ecryptfs_get_auth_tok_sig(&candidate_auth_tok_sig, From 7598e8700e9a507496660219924ca8c5aa3088d6 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 27 Mar 2018 15:35:14 +0800 Subject: [PATCH 006/660] drm/i915/gvt: Missed to cancel dma map for ggtt entries We have canceled dma map for ppgtt entries. Also we need to do it for ggtt entries when them are invalidated. This can fix task hung issue as: [13517.791767] INFO: task gvt_service_thr:1081 blocked for more than 120 seconds. [13517.792584] Not tainted 4.14.15+ #3 [13517.793417] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [13517.794267] gvt_service_thr D 0 1081 2 0x80000000 [13517.795132] Call Trace: [13517.795996] ? __schedule+0x493/0x77b [13517.796859] schedule+0x79/0x82 [13517.797740] schedule_preempt_disabled+0x5/0x6 [13517.798614] __mutex_lock.isra.0+0x2b5/0x445 [13517.799504] ? __switch_to_asm+0x24/0x60 [13517.800381] ? intel_gvt_cleanup+0x10/0x10 [13517.801261] ? intel_gvt_schedule+0x19/0x2b9 [13517.802107] intel_gvt_schedule+0x19/0x2b9 [13517.802954] ? intel_gvt_cleanup+0x10/0x10 [13517.803824] gvt_service_thread+0xe3/0x10d [13517.804704] ? wait_woken+0x68/0x68 [13517.805588] kthread+0x118/0x120 [13517.806478] ? kthread_create_on_node+0x3a/0x3a [13517.807381] ? call_usermodehelper_exec_async+0x113/0x11a [13517.808307] ret_from_fork+0x35/0x40 v3: split out ggtt reset case. v2: also unmap ggtt during reset. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index d29281231507..8c9477c272be 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -530,6 +530,16 @@ static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, false, 0, mm->vgpu); } +static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); +} + static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_entry *entry, unsigned long index) { @@ -1818,6 +1828,18 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, return ret; } +static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, + struct intel_gvt_gtt_entry *entry) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; + unsigned long pfn; + + pfn = pte_ops->get_pfn(entry); + if (pfn != vgpu->gvt->gtt.scratch_mfn) + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, + pfn << PAGE_SHIFT); +} + static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { @@ -1844,10 +1866,10 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, bytes); - m = e; if (ops->test_present(&e)) { gfn = ops->get_pfn(&e); + m = e; /* one PTE update may be issued in multiple writes and the * first write may not construct a valid gfn @@ -1868,8 +1890,12 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, ops->set_pfn(&m, gvt->gtt.scratch_mfn); } else ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); - } else + } else { + ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index); + ggtt_invalidate_pte(vgpu, &m); ops->set_pfn(&m, gvt->gtt.scratch_mfn); + ops->clear_present(&m); + } out: ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); From f4c43db356198d8091442f593871b0beb5c139b2 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 27 Mar 2018 15:35:15 +0800 Subject: [PATCH 007/660] drm/i915/gvt: Cancel dma map when resetting ggtt entries Ditto, don't forget ggtt entries during reset. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 22 +++++++++++++++++----- drivers/gpu/drm/i915/gvt/gtt.h | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 8c9477c272be..78e55aafc8bc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2056,7 +2056,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return PTR_ERR(gtt->ggtt_mm); } - intel_vgpu_reset_ggtt(vgpu); + intel_vgpu_reset_ggtt(vgpu, false); return create_scratch_page_tree(vgpu); } @@ -2341,17 +2341,19 @@ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) /** * intel_vgpu_reset_ggtt - reset the GGTT entry * @vgpu: a vGPU + * @invalidate_old: invalidate old entries * * This function is called at the vGPU create stage * to reset all the GGTT entries. * */ -void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) { struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *dev_priv = gvt->dev_priv; struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; + struct intel_gvt_gtt_entry old_entry; u32 index; u32 num_entries; @@ -2360,13 +2362,23 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; - while (num_entries--) + while (num_entries--) { + if (invalidate_old) { + ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); + ggtt_invalidate_pte(vgpu, &old_entry); + } ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); + } index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; - while (num_entries--) + while (num_entries--) { + if (invalidate_old) { + ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); + ggtt_invalidate_pte(vgpu, &old_entry); + } ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); + } ggtt_invalidate(dev_priv); } @@ -2386,5 +2398,5 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) * removing the shadow pages. */ intel_vgpu_destroy_all_ppgtt_mm(vgpu); - intel_vgpu_reset_ggtt(vgpu); + intel_vgpu_reset_ggtt(vgpu, true); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index a8b369cd352b..3792f2b7f4ff 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -193,7 +193,7 @@ struct intel_vgpu_gtt { extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); -void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old); void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); From a733390f9a79876635013e57da25f91b6e78ffe6 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Wed, 28 Mar 2018 05:30:13 +0800 Subject: [PATCH 008/660] drm/i915/gvt: Delete redundant error message in fb_decode.c Much error message exist in host dmesg when guest boot up with local display enabled. [ 167.680011] gvt: vgpu 1: invalid range gmadr 0x0 size 0x0 [ 167.680013] gvt: vgpu 1: invalid gma address: 0 The second error line duplicate with the first error line, so this patch remove this redundant error message and make the next error message much clearer. Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/fb_decoder.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 6b50fe78dc1b..1c120683e958 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -245,16 +245,13 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate primary plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } @@ -371,16 +368,13 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, alpha_plane, alpha_force); plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate cursor plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } @@ -476,16 +470,13 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->drm_format = drm_format; plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate sprite plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } From 65eff272330c72689fb5e20dd6491826fd87a39c Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Wed, 28 Mar 2018 05:30:14 +0800 Subject: [PATCH 009/660] drm/i915/gvt: Disable primary/sprite/cursor plane at virtual display initialization Much error exist in host dmesg during guest boot up with loca display enabled. gvt: vgpu 1: invalid range gmadr 0x0 size 0x0 This error happens when qemu get dmabuf info in case that the virtual display plane is enabled but its base address is an invalid 0, such case may be true before guest enable its plane. At this moment, its state is copied from host where the plane may be enabled. This patch disable primary/sprite/cursor plane at virtual display initialization, so intel_vgpu_decode_primary/cursor/sprite could return early as plane is disabled, then plane base check is skipped and error message disapper. Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index dd96ffc878ac..6d8180e8d1e2 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -169,6 +169,8 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { static void emulate_monitor_status_change(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int pipe; + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); @@ -267,6 +269,14 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) if (IS_BROADWELL(dev_priv)) vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + /* Disable Primary/Sprite/Cursor plane */ + for_each_pipe(dev_priv, pipe) { + vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISPLAY_PLANE_ENABLE; + vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~CURSOR_MODE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= CURSOR_MODE_DISABLE; + } + vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } From 10996f802109c83421ca30556cfe36ffc3bebae3 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 28 Mar 2018 13:49:29 +0800 Subject: [PATCH 010/660] drm/i915/gvt: Add drm_format_mod update Add drm_format_mod update, which is omitted. Fixes: e546e281("drm/i915/gvt: Dmabuf support for GVT-g") Cc: stable@vger.kernel.org Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index b555eb26f9ce..6f4f8e941fc2 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -323,6 +323,7 @@ static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf, struct intel_vgpu_fb_info *fb_info) { gvt_dmabuf->drm_format = fb_info->drm_format; + gvt_dmabuf->drm_format_mod = fb_info->drm_format_mod; gvt_dmabuf->width = fb_info->width; gvt_dmabuf->height = fb_info->height; gvt_dmabuf->stride = fb_info->stride; From ff2969c479d97c6221a9835ee0ab4c44513badc6 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Thu, 8 Mar 2018 17:24:57 +0200 Subject: [PATCH 011/660] dt-binding: timer: document NPCM7xx timer DT bindings Added device tree binding documentation for Nuvoton NPCM7xx timer. Signed-off-by: Tomer Maimon Acked-by: Rob Herring Reviewed-by: Brendan Higgins Signed-off-by: Daniel Lezcano --- .../bindings/timer/nuvoton,npcm7xx-timer.txt | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt diff --git a/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt new file mode 100644 index 000000000000..ea22dfe485be --- /dev/null +++ b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt @@ -0,0 +1,21 @@ +Nuvoton NPCM7xx timer + +Nuvoton NPCM7xx have three timer modules, each timer module provides five 24-bit +timer counters. + +Required properties: +- compatible : "nuvoton,npcm750-timer" for Poleg NPCM750. +- reg : Offset and length of the register set for the device. +- interrupts : Contain the timer interrupt with flags for + falling edge. +- clocks : phandle of timer reference clock (usually a 25 MHz clock). + +Example: + +timer@f0008000 { + compatible = "nuvoton,npcm750-timer"; + interrupts = ; + reg = <0xf0008000 0x50>; + clocks = <&clk NPCM7XX_CLK_TIMER>; +}; + From 1c00289ecd12471ba9733e61aaf1d39883a77b16 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Thu, 8 Mar 2018 17:24:58 +0200 Subject: [PATCH 012/660] clocksource/drivers/npcm: Add NPCM7xx timer driver Add Nuvoton BMC NPCM7xx timer driver. The clocksource Enable 24-bit TIMER0 and TIMER1 counters, while TIMER0 serve as clockevent and TIMER1 serve as clocksource. Signed-off-by: Tomer Maimon Reviewed-by: Brendan Higgins Signed-off-by: Daniel Lezcano --- drivers/clocksource/Kconfig | 8 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-npcm7xx.c | 215 ++++++++++++++++++++++++++++ 3 files changed, 224 insertions(+) create mode 100644 drivers/clocksource/timer-npcm7xx.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index b3b4ed9b6874..76194bc20bdf 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -130,6 +130,14 @@ config VT8500_TIMER help Enables support for the VT8500 driver. +config NPCM7XX_TIMER + bool "NPCM7xx timer driver" if COMPILE_TEST + depends on HAS_IOMEM + select CLKSRC_MMIO + help + Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture, + While TIMER0 serves as clockevent and TIMER1 serves as clocksource. + config CADENCE_TTC_TIMER bool "Cadence TTC timer driver" if COMPILE_TEST depends on COMMON_CLK diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index d6dec4489d66..74387877f7cf 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o obj-$(CONFIG_OWL_TIMER) += owl-timer.o obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o +obj-$(CONFIG_NPCM7XX_TIMER) += timer-npcm7xx.o obj-$(CONFIG_ARC_TIMERS) += arc_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o diff --git a/drivers/clocksource/timer-npcm7xx.c b/drivers/clocksource/timer-npcm7xx.c new file mode 100644 index 000000000000..7a9bb5532d99 --- /dev/null +++ b/drivers/clocksource/timer-npcm7xx.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2014-2018 Nuvoton Technologies tomer.maimon@nuvoton.com + * All rights reserved. + * + * Copyright 2017 Google, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "timer-of.h" + +/* Timers registers */ +#define NPCM7XX_REG_TCSR0 0x0 /* Timer 0 Control and Status Register */ +#define NPCM7XX_REG_TICR0 0x8 /* Timer 0 Initial Count Register */ +#define NPCM7XX_REG_TCSR1 0x4 /* Timer 1 Control and Status Register */ +#define NPCM7XX_REG_TICR1 0xc /* Timer 1 Initial Count Register */ +#define NPCM7XX_REG_TDR1 0x14 /* Timer 1 Data Register */ +#define NPCM7XX_REG_TISR 0x18 /* Timer Interrupt Status Register */ + +/* Timers control */ +#define NPCM7XX_Tx_RESETINT 0x1f +#define NPCM7XX_Tx_PERIOD BIT(27) +#define NPCM7XX_Tx_INTEN BIT(29) +#define NPCM7XX_Tx_COUNTEN BIT(30) +#define NPCM7XX_Tx_ONESHOT 0x0 +#define NPCM7XX_Tx_OPER GENMASK(3, 27) +#define NPCM7XX_Tx_MIN_PRESCALE 0x1 +#define NPCM7XX_Tx_TDR_MASK_BITS 24 +#define NPCM7XX_Tx_MAX_CNT 0xFFFFFF +#define NPCM7XX_T0_CLR_INT 0x1 +#define NPCM7XX_Tx_CLR_CSR 0x0 + +/* Timers operating mode */ +#define NPCM7XX_START_PERIODIC_Tx (NPCM7XX_Tx_PERIOD | NPCM7XX_Tx_COUNTEN | \ + NPCM7XX_Tx_INTEN | \ + NPCM7XX_Tx_MIN_PRESCALE) + +#define NPCM7XX_START_ONESHOT_Tx (NPCM7XX_Tx_ONESHOT | NPCM7XX_Tx_COUNTEN | \ + NPCM7XX_Tx_INTEN | \ + NPCM7XX_Tx_MIN_PRESCALE) + +#define NPCM7XX_START_Tx (NPCM7XX_Tx_COUNTEN | NPCM7XX_Tx_PERIOD | \ + NPCM7XX_Tx_MIN_PRESCALE) + +#define NPCM7XX_DEFAULT_CSR (NPCM7XX_Tx_CLR_CSR | NPCM7XX_Tx_MIN_PRESCALE) + +static int npcm7xx_timer_resume(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val |= NPCM7XX_Tx_COUNTEN; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_timer_shutdown(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val &= ~NPCM7XX_Tx_COUNTEN; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_timer_oneshot(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val &= ~NPCM7XX_Tx_OPER; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val |= NPCM7XX_START_ONESHOT_Tx; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_timer_periodic(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val &= ~NPCM7XX_Tx_OPER; + + writel(timer_of_period(to), timer_of_base(to) + NPCM7XX_REG_TICR0); + val |= NPCM7XX_START_PERIODIC_Tx; + + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_clockevent_set_next_event(unsigned long evt, + struct clock_event_device *clk) +{ + struct timer_of *to = to_timer_of(clk); + u32 val; + + writel(evt, timer_of_base(to) + NPCM7XX_REG_TICR0); + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val |= NPCM7XX_START_Tx; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static irqreturn_t npcm7xx_timer0_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = (struct clock_event_device *)dev_id; + struct timer_of *to = to_timer_of(evt); + + writel(NPCM7XX_T0_CLR_INT, timer_of_base(to) + NPCM7XX_REG_TISR); + + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static struct timer_of npcm7xx_to = { + .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, + + .clkevt = { + .name = "npcm7xx-timer0", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = npcm7xx_clockevent_set_next_event, + .set_state_shutdown = npcm7xx_timer_shutdown, + .set_state_periodic = npcm7xx_timer_periodic, + .set_state_oneshot = npcm7xx_timer_oneshot, + .tick_resume = npcm7xx_timer_resume, + .rating = 300, + }, + + .of_irq = { + .handler = npcm7xx_timer0_interrupt, + .flags = IRQF_TIMER | IRQF_IRQPOLL, + }, +}; + +static void __init npcm7xx_clockevents_init(void) +{ + writel(NPCM7XX_DEFAULT_CSR, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR0); + + writel(NPCM7XX_Tx_RESETINT, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TISR); + + npcm7xx_to.clkevt.cpumask = cpumask_of(0); + clockevents_config_and_register(&npcm7xx_to.clkevt, + timer_of_rate(&npcm7xx_to), + 0x1, NPCM7XX_Tx_MAX_CNT); +} + +static void __init npcm7xx_clocksource_init(void) +{ + u32 val; + + writel(NPCM7XX_DEFAULT_CSR, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); + writel(NPCM7XX_Tx_MAX_CNT, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TICR1); + + val = readl(timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); + val |= NPCM7XX_START_Tx; + writel(val, timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); + + clocksource_mmio_init(timer_of_base(&npcm7xx_to) + + NPCM7XX_REG_TDR1, + "npcm7xx-timer1", timer_of_rate(&npcm7xx_to), + 200, (unsigned int)NPCM7XX_Tx_TDR_MASK_BITS, + clocksource_mmio_readl_down); +} + +static int __init npcm7xx_timer_init(struct device_node *np) +{ + int ret; + + ret = timer_of_init(np, &npcm7xx_to); + if (ret) + return ret; + + /* Clock input is divided by PRESCALE + 1 before it is fed */ + /* to the counter */ + npcm7xx_to.of_clk.rate = npcm7xx_to.of_clk.rate / + (NPCM7XX_Tx_MIN_PRESCALE + 1); + + npcm7xx_clocksource_init(); + npcm7xx_clockevents_init(); + + pr_info("Enabling NPCM7xx clocksource timer base: %px, IRQ: %d ", + timer_of_base(&npcm7xx_to), timer_of_irq(&npcm7xx_to)); + + return 0; +} + +TIMER_OF_DECLARE(npcm7xx, "nuvoton,npcm750-timer", npcm7xx_timer_init); + From cc01456a0d9a3cbfec85cf23f2ce53323e8fc973 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 28 Mar 2018 11:22:35 +0800 Subject: [PATCH 013/660] dt-bindings: timer: tpm: fix typo of clock name The clock name should be ipg instead of igp. Signed-off-by: Anson Huang Reviewed-by: Rob Herring Signed-off-by: Daniel Lezcano --- Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt index b4aa7ddb5b13..f82087b220f4 100644 --- a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt +++ b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt @@ -15,7 +15,7 @@ Required properties: - interrupts : Should be the clock event device interrupt. - clocks : The clocks provided by the SoC to drive the timer, must contain an entry for each entry in clock-names. -- clock-names : Must include the following entries: "igp" and "per". +- clock-names : Must include the following entries: "ipg" and "per". Example: tpm5: tpm@40260000 { From 16328e7bd428937f557a984d8b3378387c17a930 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 28 Mar 2018 11:22:36 +0800 Subject: [PATCH 014/660] clocksource/drivers/imx-tpm: Fix typo of clock name The clock name should be ipg instead of igp. Signed-off-by: Anson Huang Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 21bffdcb2f20..3f97d4985824 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -179,7 +179,7 @@ static int __init tpm_timer_init(struct device_node *np) ipg = of_clk_get_by_name(np, "ipg"); per = of_clk_get_by_name(np, "per"); if (IS_ERR(ipg) || IS_ERR(per)) { - pr_err("tpm: failed to get igp or per clk\n"); + pr_err("tpm: failed to get ipg or per clk\n"); ret = -ENODEV; goto err_clk_get; } From 506a7be93ff773d5d4cf75a59f342865605b4910 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 28 Mar 2018 11:22:37 +0800 Subject: [PATCH 015/660] clocksource/drivers/imx-tpm: Correct some registers operation flow According to i.MX7ULP reference manual, TPM_SC_CPWMS can ONLY be written when counter is disabled, TPM_SC_TOF is write-1-clear, TPM_C0SC_CHF is also write-1-clear, correct these registers initialization flow; Signed-off-by: Anson Huang Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 3f97d4985824..7403e494417a 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -20,6 +20,7 @@ #define TPM_SC 0x10 #define TPM_SC_CMOD_INC_PER_CNT (0x1 << 3) #define TPM_SC_CMOD_DIV_DEFAULT 0x3 +#define TPM_SC_TOF_MASK (0x1 << 7) #define TPM_CNT 0x14 #define TPM_MOD 0x18 #define TPM_STATUS 0x1c @@ -29,6 +30,7 @@ #define TPM_C0SC_MODE_SHIFT 2 #define TPM_C0SC_MODE_MASK 0x3c #define TPM_C0SC_MODE_SW_COMPARE 0x4 +#define TPM_C0SC_CHF_MASK (0x1 << 7) #define TPM_C0V 0x24 static void __iomem *timer_base; @@ -205,9 +207,13 @@ static int __init tpm_timer_init(struct device_node *np) * 4) Channel0 disabled * 5) DMA transfers disabled */ + /* make sure counter is disabled */ writel(0, timer_base + TPM_SC); + /* TOF is W1C */ + writel(TPM_SC_TOF_MASK, timer_base + TPM_SC); writel(0, timer_base + TPM_CNT); - writel(0, timer_base + TPM_C0SC); + /* CHF is W1C */ + writel(TPM_C0SC_CHF_MASK, timer_base + TPM_C0SC); /* increase per cnt, div 8 by default */ writel(TPM_SC_CMOD_INC_PER_CNT | TPM_SC_CMOD_DIV_DEFAULT, From 0136c741ff40e03323419feec05fcd594f36a463 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 28 Mar 2018 11:22:38 +0800 Subject: [PATCH 016/660] clocksource/drivers/imx-tpm: Add different counter width support Different TPM modules have different width counters which is 16-bit or 32-bit, the counter width can be read from TPM_PARAM register bit[23:16], this patch adds dynamic check for counter width to support both 16-bit and 32-bit TPM modules. Signed-off-by: Anson Huang Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-imx-tpm.c | 33 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 7403e494417a..05d97a6871d8 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -17,9 +17,13 @@ #include #include +#define TPM_PARAM 0x4 +#define TPM_PARAM_WIDTH_SHIFT 16 +#define TPM_PARAM_WIDTH_MASK (0xff << 16) #define TPM_SC 0x10 #define TPM_SC_CMOD_INC_PER_CNT (0x1 << 3) #define TPM_SC_CMOD_DIV_DEFAULT 0x3 +#define TPM_SC_CMOD_DIV_MAX 0x7 #define TPM_SC_TOF_MASK (0x1 << 7) #define TPM_CNT 0x14 #define TPM_MOD 0x18 @@ -33,6 +37,8 @@ #define TPM_C0SC_CHF_MASK (0x1 << 7) #define TPM_C0V 0x24 +static int counter_width; +static int rating; static void __iomem *timer_base; static struct clock_event_device clockevent_tpm; @@ -85,10 +91,11 @@ static int __init tpm_clocksource_init(unsigned long rate) tpm_delay_timer.freq = rate; register_current_timer_delay(&tpm_delay_timer); - sched_clock_register(tpm_read_sched_clock, 32, rate); + sched_clock_register(tpm_read_sched_clock, counter_width, rate); return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", - rate, 200, 32, clocksource_mmio_readl_up); + rate, rating, counter_width, + clocksource_mmio_readl_up); } static int tpm_set_next_event(unsigned long delta, @@ -141,7 +148,6 @@ static struct clock_event_device clockevent_tpm = { .set_state_oneshot = tpm_set_state_oneshot, .set_next_event = tpm_set_next_event, .set_state_shutdown = tpm_set_state_shutdown, - .rating = 200, }; static int __init tpm_clockevent_init(unsigned long rate, int irq) @@ -151,10 +157,11 @@ static int __init tpm_clockevent_init(unsigned long rate, int irq) ret = request_irq(irq, tpm_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, "i.MX7ULP TPM Timer", &clockevent_tpm); + clockevent_tpm.rating = rating; clockevent_tpm.cpumask = cpumask_of(0); clockevent_tpm.irq = irq; - clockevents_config_and_register(&clockevent_tpm, - rate, 300, 0xfffffffe); + clockevents_config_and_register(&clockevent_tpm, rate, 300, + GENMASK(counter_width - 1, 1)); return ret; } @@ -199,6 +206,11 @@ static int __init tpm_timer_init(struct device_node *np) goto err_per_clk_enable; } + counter_width = (readl(timer_base + TPM_PARAM) & TPM_PARAM_WIDTH_MASK) + >> TPM_PARAM_WIDTH_SHIFT; + /* use rating 200 for 32-bit counter and 150 for 16-bit counter */ + rating = counter_width == 0x20 ? 200 : 150; + /* * Initialize tpm module to a known state * 1) Counter disabled @@ -215,12 +227,17 @@ static int __init tpm_timer_init(struct device_node *np) /* CHF is W1C */ writel(TPM_C0SC_CHF_MASK, timer_base + TPM_C0SC); - /* increase per cnt, div 8 by default */ - writel(TPM_SC_CMOD_INC_PER_CNT | TPM_SC_CMOD_DIV_DEFAULT, + /* + * increase per cnt, + * div 8 for 32-bit counter and div 128 for 16-bit counter + */ + writel(TPM_SC_CMOD_INC_PER_CNT | + (counter_width == 0x20 ? + TPM_SC_CMOD_DIV_DEFAULT : TPM_SC_CMOD_DIV_MAX), timer_base + TPM_SC); /* set MOD register to maximum for free running mode */ - writel(0xffffffff, timer_base + TPM_MOD); + writel(GENMASK(counter_width - 1, 0), timer_base + TPM_MOD); rate = clk_get_rate(per) >> 3; ret = tpm_clocksource_init(rate); From 38057aa1639b74d5c1e0dc1ca7c22bc7a31de714 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 24 Mar 2018 12:58:29 +0000 Subject: [PATCH 017/660] drm/i915/execlists: Clear user-active flag on preemption completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cancelling the requests and clearing out the ports following a successful preemption completion, also clear the active flag. I had assumed that all preemptions would be followed by an immediate dequeue (preserving the active user flag), but under rare circumstances we may be triggering a preemption for the second port only for it to have completed before the preemotion kicks in; leaving execlists->active set even though the system is now idle. We can clear the flag inside the common execlists_cancel_port_requests() as the other users also expect the semantics of active being cleared. Fixes: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports") Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Michel Thierry Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180324125829.27026-1-chris@chris-wilson.co.uk (cherry picked from commit eed7ec52f214bac2f25395ccaad610fbeb842a6e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 697af5add78b..e3a5f673ff67 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -577,6 +577,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * know the next preemption status we see corresponds * to this ELSP update. */ + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) goto unlock; @@ -738,6 +740,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) memset(port, 0, sizeof(*port)); port++; } + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -1001,6 +1005,11 @@ static void execlists_submission_tasklet(unsigned long data) if (fw) intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); + + /* If the engine is now idle, so should be the flag; and vice versa. */ + GEM_BUG_ON(execlists_is_active(&engine->execlists, + EXECLISTS_ACTIVE_USER) == + !port_isset(engine->execlists.port)); } static void queue_request(struct intel_engine_cs *engine, From 2e210bbb7429cdcf1a1a3ad00c1bf98bd9bf2452 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 3 Apr 2018 10:52:20 -0700 Subject: [PATCH 018/660] HID: input: fix battery level reporting on BT mice The commit 581c4484769e ("HID: input: map digitizer battery usage") assumed that devices having input (qas opposed to feature) report for battery strength would report the data on their own, without the need to be polled by the kernel; unfortunately it is not so. Many wireless mice do not send unsolicited reports with battery strength data and have to be polled explicitly. As a complication, stylus devices on digitizers are not normally connected to the base and thus can not be polled - the base can only determine battery strength in the stylus when it is in proximity. To solve this issue, we add a special flag that tells the kernel to avoid polling the device (and expect unsolicited reports) and set it when report field with physical usage of digitizer stylus (HID_DG_STYLUS). Unless this flag is set, and we have not seen the unsolicited reports, the kernel will attempt to poll the device when userspace attempts to read "capacity" and "state" attributes of power_supply object corresponding to the devices battery. Fixes: 581c4484769e ("HID: input: map digitizer battery usage") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198095 Cc: stable@vger.kernel.org Reported-and-tested-by: Martin van Es Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 24 +++++++++++++++++------- include/linux/hid.h | 9 ++++++++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 6836a856c243..930652c25120 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -387,7 +387,8 @@ static int hidinput_get_battery_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CAPACITY: - if (dev->battery_report_type == HID_FEATURE_REPORT) { + if (dev->battery_status != HID_BATTERY_REPORTED && + !dev->battery_avoid_query) { value = hidinput_query_battery_capacity(dev); if (value < 0) return value; @@ -403,17 +404,17 @@ static int hidinput_get_battery_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_STATUS: - if (!dev->battery_reported && - dev->battery_report_type == HID_FEATURE_REPORT) { + if (dev->battery_status != HID_BATTERY_REPORTED && + !dev->battery_avoid_query) { value = hidinput_query_battery_capacity(dev); if (value < 0) return value; dev->battery_capacity = value; - dev->battery_reported = true; + dev->battery_status = HID_BATTERY_QUERIED; } - if (!dev->battery_reported) + if (dev->battery_status == HID_BATTERY_UNKNOWN) val->intval = POWER_SUPPLY_STATUS_UNKNOWN; else if (dev->battery_capacity == 100) val->intval = POWER_SUPPLY_STATUS_FULL; @@ -486,6 +487,14 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, dev->battery_report_type = report_type; dev->battery_report_id = field->report->id; + /* + * Stylus is normally not connected to the device and thus we + * can't query the device and get meaningful battery strength. + * We have to wait for the device to report it on its own. + */ + dev->battery_avoid_query = report_type == HID_INPUT_REPORT && + field->physical == HID_DG_STYLUS; + dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg); if (IS_ERR(dev->battery)) { error = PTR_ERR(dev->battery); @@ -530,9 +539,10 @@ static void hidinput_update_battery(struct hid_device *dev, int value) capacity = hidinput_scale_battery_capacity(dev, value); - if (!dev->battery_reported || capacity != dev->battery_capacity) { + if (dev->battery_status != HID_BATTERY_REPORTED || + capacity != dev->battery_capacity) { dev->battery_capacity = capacity; - dev->battery_reported = true; + dev->battery_status = HID_BATTERY_REPORTED; power_supply_changed(dev->battery); } } diff --git a/include/linux/hid.h b/include/linux/hid.h index 8da3e1f48195..26240a22978a 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -516,6 +516,12 @@ enum hid_type { HID_TYPE_USBNONE }; +enum hid_battery_status { + HID_BATTERY_UNKNOWN = 0, + HID_BATTERY_QUERIED, /* Kernel explicitly queried battery strength */ + HID_BATTERY_REPORTED, /* Device sent unsolicited battery strength report */ +}; + struct hid_driver; struct hid_ll_driver; @@ -558,7 +564,8 @@ struct hid_device { /* device report descriptor */ __s32 battery_max; __s32 battery_report_type; __s32 battery_report_id; - bool battery_reported; + enum hid_battery_status battery_status; + bool battery_avoid_query; #endif unsigned int status; /* see STAT flags above */ From a955358d54695e4ad9f7d6489a7ac4d69a8fc711 Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Fri, 6 Apr 2018 01:09:36 +0200 Subject: [PATCH 019/660] HID: hidraw: Fix crash on HIDIOCGFEATURE with a destroyed device Doing `ioctl(HIDIOCGFEATURE)` in a tight loop on a hidraw device and then disconnecting the device, or unloading the driver, can cause a NULL pointer dereference. When a hidraw device is destroyed it sets 0 to `dev->exist`. Most functions check 'dev->exist' before doing its work, but `hidraw_get_report()` was missing that check. Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Rivas Costa Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index fbfcc8009432..b39844adea47 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -192,6 +192,11 @@ static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t int ret = 0, len; unsigned char report_number; + if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { + ret = -ENODEV; + goto out; + } + dev = hidraw_table[minor]->hid; if (!dev->ll_driver->raw_request) { From 54a307ba8d3cd00a3902337ffaae28f436eeb1a4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 4 Apr 2018 23:42:18 +0300 Subject: [PATCH 020/660] fanotify: fix logic of events on child When event on child inodes are sent to the parent inode mark and parent inode mark was not marked with FAN_EVENT_ON_CHILD, the event will not be delivered to the listener process. However, if the same process also has a mount mark, the event to the parent inode will be delivered regadless of the mount mark mask. This behavior is incorrect in the case where the mount mark mask does not contain the specific event type. For example, the process adds a mark on a directory with mask FAN_MODIFY (without FAN_EVENT_ON_CHILD) and a mount mark with mask FAN_CLOSE_NOWRITE (without FAN_ONDIR). A modify event on a file inside that directory (and inside that mount) should not create a FAN_MODIFY event, because neither of the marks requested to get that event on the file. Fixes: 1968f5eed54c ("fanotify: use both marks when possible") Cc: stable Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d51e1bb781cf..d94e8031fe5f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, u32 event_mask, const void *data, int data_type) { - __u32 marks_mask, marks_ignored_mask; + __u32 marks_mask = 0, marks_ignored_mask = 0; const struct path *path = data; pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" @@ -108,24 +108,20 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !d_can_lookup(path->dentry)) return false; - if (inode_mark && vfsmnt_mark) { - marks_mask = (vfsmnt_mark->mask | inode_mark->mask); - marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); - } else if (inode_mark) { - /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! - */ - if ((event_mask & FS_EVENT_ON_CHILD) && - !(inode_mark->mask & FS_EVENT_ON_CHILD)) - return false; - marks_mask = inode_mark->mask; - marks_ignored_mask = inode_mark->ignored_mask; - } else if (vfsmnt_mark) { - marks_mask = vfsmnt_mark->mask; - marks_ignored_mask = vfsmnt_mark->ignored_mask; - } else { - BUG(); + /* + * if the event is for a child and this inode doesn't care about + * events on the child, don't send it! + */ + if (inode_mark && + (!(event_mask & FS_EVENT_ON_CHILD) || + (inode_mark->mask & FS_EVENT_ON_CHILD))) { + marks_mask |= inode_mark->mask; + marks_ignored_mask |= inode_mark->ignored_mask; + } + + if (vfsmnt_mark) { + marks_mask |= vfsmnt_mark->mask; + marks_ignored_mask |= vfsmnt_mark->ignored_mask; } if (d_is_dir(path->dentry) && From 876c27314ce51fe7e7e2aeb24a6448da1a26c78f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 Apr 2018 12:10:09 +0200 Subject: [PATCH 021/660] netfilter: nf_conntrack_sip: allow duplicate SDP expectations Callum Sinclair reported SIP IP Phone errors that he tracked down to such phones sending session descriptions for different media types but with same port numbers. The expect core will only 'refresh' existing expectation if it is from same master AND same expectation class (media type). As expectation class is different, we get an error. The SIP connection tracking code will then 1). drop the SDP packet 2). if an rtp expectation was already installed successfully, error on rtcp expectation will cancel the rtp one. Make the expect core report back to caller when the conflict is due to different expectation class and have SIP tracker ignore soft-error. Reported-by: Callum Sinclair Tested-by: Callum Sinclair Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 5 ++++- net/netfilter/nf_conntrack_sip.c | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 8ef21d9f9a00..4b2b3d53acfc 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -252,7 +252,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { - return a->master == b->master && a->class == b->class && + return a->master == b->master && nf_ct_tuple_equal(&a->tuple, &b->tuple) && nf_ct_tuple_mask_equal(&a->mask, &b->mask) && net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && @@ -421,6 +421,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) h = nf_ct_expect_dst_hash(net, &expect->tuple); hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { + if (i->class != expect->class) + return -EALREADY; + if (nf_ct_remove_expect(i)) break; } else if (expect_clash(i, expect)) { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4dbb5bad4363..908e51e2dc2b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -938,11 +938,19 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { - if (nf_ct_expect_related(rtp_exp) == 0) { - if (nf_ct_expect_related(rtcp_exp) != 0) - nf_ct_unexpect_related(rtp_exp); - else + /* -EALREADY handling works around end-points that send + * SDP messages with identical port but different media type, + * we pretend expectation was set up. + */ + int errp = nf_ct_expect_related(rtp_exp); + + if (errp == 0 || errp == -EALREADY) { + int errcp = nf_ct_expect_related(rtcp_exp); + + if (errcp == 0 || errcp == -EALREADY) ret = NF_ACCEPT; + else if (errp == 0) + nf_ct_unexpect_related(rtp_exp); } } nf_ct_expect_put(rtcp_exp); From 5c64576a77894a50be80be0024bed27171b55989 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 7 Apr 2018 15:50:47 +0300 Subject: [PATCH 022/660] ipvs: fix rtnl_lock lockups caused by start_sync_thread syzkaller reports for wrong rtnl_lock usage in sync code [1] and [2] We have 2 problems in start_sync_thread if error path is taken, eg. on memory allocation error or failure to configure sockets for mcast group or addr/port binding: 1. recursive locking: holding rtnl_lock while calling sock_release which in turn calls again rtnl_lock in ip_mc_drop_socket to leave the mcast group, as noticed by Florian Westphal. Additionally, sock_release can not be called while holding sync_mutex (ABBA deadlock). 2. task hung: holding rtnl_lock while calling kthread_stop to stop the running kthreads. As the kthreads do the same to leave the mcast group (sock_release -> ip_mc_drop_socket -> rtnl_lock) they hang. Fix the problems by calling rtnl_unlock early in the error path, now sock_release is called after unlocking both mutexes. Problem 3 (task hung reported by syzkaller [2]) is variant of problem 2: use _trylock to prevent one user to call rtnl_lock and then while waiting for sync_mutex to block kthreads that execute sock_release when they are stopped by stop_sync_thread. [1] IPVS: stopping backup sync thread 4500 ... WARNING: possible recursive locking detected 4.16.0-rc7+ #3 Not tainted -------------------------------------------- syzkaller688027/4497 is trying to acquire lock: (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 but task is already holding lock: IPVS: stopping backup sync thread 4495 ... (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(rtnl_mutex); lock(rtnl_mutex); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by syzkaller688027/4497: #0: (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 #1: (ipvs->sync_mutex){+.+.}, at: [<00000000703f78e3>] do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388 stack backtrace: CPU: 1 PID: 4497 Comm: syzkaller688027 Not tainted 4.16.0-rc7+ #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_deadlock_bug kernel/locking/lockdep.c:1761 [inline] check_deadlock kernel/locking/lockdep.c:1805 [inline] validate_chain kernel/locking/lockdep.c:2401 [inline] __lock_acquire+0xe8f/0x3e00 kernel/locking/lockdep.c:3431 lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920 __mutex_lock_common kernel/locking/mutex.c:756 [inline] __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 ip_mc_drop_socket+0x88/0x230 net/ipv4/igmp.c:2643 inet_release+0x4e/0x1c0 net/ipv4/af_inet.c:413 sock_release+0x8d/0x1e0 net/socket.c:595 start_sync_thread+0x2213/0x2b70 net/netfilter/ipvs/ip_vs_sync.c:1924 do_ip_vs_set_ctl+0x1139/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2389 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1261 udp_setsockopt+0x45/0x80 net/ipv4/udp.c:2406 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2975 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x446a69 RSP: 002b:00007fa1c3a64da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000446a69 RDX: 000000000000048b RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00000000006e29fc R08: 0000000000000018 R09: 0000000000000000 R10: 00000000200000c0 R11: 0000000000000246 R12: 00000000006e29f8 R13: 00676e697279656b R14: 00007fa1c3a659c0 R15: 00000000006e2b60 [2] IPVS: sync thread started: state = BACKUP, mcast_ifn = syz_tun, syncid = 4, id = 0 IPVS: stopping backup sync thread 25415 ... INFO: task syz-executor7:25421 blocked for more than 120 seconds. Not tainted 4.16.0-rc6+ #284 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor7 D23688 25421 4408 0x00000004 Call Trace: context_switch kernel/sched/core.c:2862 [inline] __schedule+0x8fb/0x1ec0 kernel/sched/core.c:3440 schedule+0xf5/0x430 kernel/sched/core.c:3499 schedule_timeout+0x1a3/0x230 kernel/time/timer.c:1777 do_wait_for_common kernel/sched/completion.c:86 [inline] __wait_for_common kernel/sched/completion.c:107 [inline] wait_for_common kernel/sched/completion.c:118 [inline] wait_for_completion+0x415/0x770 kernel/sched/completion.c:139 kthread_stop+0x14a/0x7a0 kernel/kthread.c:530 stop_sync_thread+0x3d9/0x740 net/netfilter/ipvs/ip_vs_sync.c:1996 do_ip_vs_set_ctl+0x2b1/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2394 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1253 sctp_setsockopt+0x2ca/0x63e0 net/sctp/socket.c:4154 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:3039 SYSC_setsockopt net/socket.c:1850 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1829 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x454889 RSP: 002b:00007fc927626c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007fc9276276d4 RCX: 0000000000454889 RDX: 000000000000048c RSI: 0000000000000000 RDI: 0000000000000017 RBP: 000000000072bf58 R08: 0000000000000018 R09: 0000000000000000 R10: 0000000020000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000051c R14: 00000000006f9b40 R15: 0000000000000001 Showing all locks held in the system: 2 locks held by khungtaskd/868: #0: (rcu_read_lock){....}, at: [<00000000a1a8f002>] check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline] #0: (rcu_read_lock){....}, at: [<00000000a1a8f002>] watchdog+0x1c5/0xd60 kernel/hung_task.c:249 #1: (tasklist_lock){.+.+}, at: [<0000000037c2f8f9>] debug_show_all_locks+0xd3/0x3d0 kernel/locking/lockdep.c:4470 1 lock held by rsyslogd/4247: #0: (&f->f_pos_lock){+.+.}, at: [<000000000d8d6983>] __fdget_pos+0x12b/0x190 fs/file.c:765 2 locks held by getty/4338: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4339: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4340: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4341: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4342: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4343: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 2 locks held by getty/4344: #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 3 locks held by kworker/0:5/6494: #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] work_static include/linux/workqueue.h:198 [inline] #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] set_work_data kernel/workqueue.c:619 [inline] #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline] #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000a062b18e>] process_one_work+0xb12/0x1bb0 kernel/workqueue.c:2084 #1: ((addr_chk_work).work){+.+.}, at: [<00000000278427d5>] process_one_work+0xb89/0x1bb0 kernel/workqueue.c:2088 #2: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 1 lock held by syz-executor7/25421: #0: (ipvs->sync_mutex){+.+.}, at: [<00000000d414a689>] do_ip_vs_set_ctl+0x277/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2393 2 locks held by syz-executor7/25427: #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 #1: (ipvs->sync_mutex){+.+.}, at: [<00000000e6d48489>] do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388 1 lock held by syz-executor7/25435: #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 1 lock held by ipvs-b:2:0/25415: #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 Reported-and-tested-by: syzbot+a46d6abf9d56b1365a72@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+5fe074c01b2032ce9618@syzkaller.appspotmail.com Fixes: e0b26cc997d5 ("ipvs: call rtnl_lock early") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 8 -- net/netfilter/ipvs/ip_vs_sync.c | 155 ++++++++++++++++---------------- 2 files changed, 80 insertions(+), 83 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5ebde4b15810..f36098887ad0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2384,11 +2384,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) strlcpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)); cfg.syncid = dm->syncid; - rtnl_lock(); - mutex_lock(&ipvs->sync_mutex); ret = start_sync_thread(ipvs, &cfg, dm->state); - mutex_unlock(&ipvs->sync_mutex); - rtnl_unlock(); } else { mutex_lock(&ipvs->sync_mutex); ret = stop_sync_thread(ipvs, dm->state); @@ -3481,12 +3477,8 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) if (ipvs->mixed_address_family_dests > 0) return -EINVAL; - rtnl_lock(); - mutex_lock(&ipvs->sync_mutex); ret = start_sync_thread(ipvs, &c, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); - mutex_unlock(&ipvs->sync_mutex); - rtnl_unlock(); return ret; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index fbaf3bd05b2e..001501e25625 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -49,6 +49,7 @@ #include #include #include +#include #include /* Used for ntoh_seq and hton_seq */ @@ -1360,15 +1361,9 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) /* * Specifiy default interface for outgoing multicasts */ -static int set_mcast_if(struct sock *sk, char *ifname) +static int set_mcast_if(struct sock *sk, struct net_device *dev) { - struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1396,19 +1391,14 @@ static int set_mcast_if(struct sock *sk, char *ifname) * in the in_addr structure passed in as a parameter. */ static int -join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) +join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev) { - struct net *net = sock_net(sk); struct ip_mreqn mreq; - struct net_device *dev; int ret; memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1423,15 +1413,10 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) #ifdef CONFIG_IP_VS_IPV6 static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, - char *ifname) + struct net_device *dev) { - struct net *net = sock_net(sk); - struct net_device *dev; int ret; - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1443,24 +1428,18 @@ static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, } #endif -static int bind_mcastif_addr(struct socket *sock, char *ifname) +static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) { - struct net *net = sock_net(sock->sk); - struct net_device *dev; __be32 addr; struct sockaddr_in sin; - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; - addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); if (!addr) pr_err("You probably need to specify IP address on " "multicast interface.\n"); IP_VS_DBG(7, "binding socket with (%s) %pI4\n", - ifname, &addr); + dev->name, &addr); /* Now bind the socket with the address of multicast interface */ sin.sin_family = AF_INET; @@ -1493,7 +1472,8 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, /* * Set up sending multicast socket over UDP */ -static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) +static int make_send_sock(struct netns_ipvs *ipvs, int id, + struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; @@ -1505,9 +1485,10 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); - return ERR_PTR(result); + goto error; } - result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn); + *sock_ret = sock; + result = set_mcast_if(sock->sk, dev); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; @@ -1522,7 +1503,7 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) set_sock_size(sock->sk, 1, result); if (AF_INET == ipvs->mcfg.mcast_af) - result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn); + result = bind_mcastif_addr(sock, dev); else result = 0; if (result < 0) { @@ -1538,19 +1519,18 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) goto error; } - return sock; + return 0; error: - sock_release(sock); - return ERR_PTR(result); + return result; } /* * Set up receiving multicast socket over UDP */ -static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, - int ifindex) +static int make_receive_sock(struct netns_ipvs *ipvs, int id, + struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; @@ -1562,8 +1542,9 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); - return ERR_PTR(result); + goto error; } + *sock_ret = sock; /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); @@ -1571,7 +1552,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, set_sock_size(sock->sk, 0, result); get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); - sock->sk->sk_bound_dev_if = ifindex; + sock->sk->sk_bound_dev_if = dev->ifindex; result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); @@ -1582,21 +1563,20 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, #ifdef CONFIG_IP_VS_IPV6 if (ipvs->bcfg.mcast_af == AF_INET6) result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, - ipvs->bcfg.mcast_ifn); + dev); else #endif result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, - ipvs->bcfg.mcast_ifn); + dev); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; } - return sock; + return 0; error: - sock_release(sock); - return ERR_PTR(result); + return result; } @@ -1778,13 +1758,12 @@ static int sync_thread_backup(void *data) int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { - struct ip_vs_sync_thread_data *tinfo; + struct ip_vs_sync_thread_data *tinfo = NULL; struct task_struct **array = NULL, *task; - struct socket *sock; struct net_device *dev; char *name; int (*threadfn)(void *data); - int id, count, hlen; + int id = 0, count, hlen; int result = -ENOMEM; u16 mtu, min_mtu; @@ -1792,6 +1771,18 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* Do not hold one mutex and then to block on another */ + for (;;) { + rtnl_lock(); + if (mutex_trylock(&ipvs->sync_mutex)) + break; + rtnl_unlock(); + mutex_lock(&ipvs->sync_mutex); + if (rtnl_trylock()) + break; + mutex_unlock(&ipvs->sync_mutex); + } + if (!ipvs->sync_state) { count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); ipvs->threads_mask = count - 1; @@ -1810,7 +1801,8 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); if (!dev) { pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); - return -ENODEV; + result = -ENODEV; + goto out_early; } hlen = (AF_INET6 == c->mcast_af) ? sizeof(struct ipv6hdr) + sizeof(struct udphdr) : @@ -1827,26 +1819,30 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, c->sync_maxlen = mtu - hlen; if (state == IP_VS_STATE_MASTER) { + result = -EEXIST; if (ipvs->ms) - return -EEXIST; + goto out_early; ipvs->mcfg = *c; name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { + result = -EEXIST; if (ipvs->backup_threads) - return -EEXIST; + goto out_early; ipvs->bcfg = *c; name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; } else { - return -EINVAL; + result = -EINVAL; + goto out_early; } if (state == IP_VS_STATE_MASTER) { struct ipvs_master_sync_state *ms; + result = -ENOMEM; ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL); if (!ipvs->ms) goto out; @@ -1862,39 +1858,38 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, } else { array = kcalloc(count, sizeof(struct task_struct *), GFP_KERNEL); + result = -ENOMEM; if (!array) goto out; } - tinfo = NULL; for (id = 0; id < count; id++) { - if (state == IP_VS_STATE_MASTER) - sock = make_send_sock(ipvs, id); - else - sock = make_receive_sock(ipvs, id, dev->ifindex); - if (IS_ERR(sock)) { - result = PTR_ERR(sock); - goto outtinfo; - } + result = -ENOMEM; tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) - goto outsocket; + goto out; tinfo->ipvs = ipvs; - tinfo->sock = sock; + tinfo->sock = NULL; if (state == IP_VS_STATE_BACKUP) { tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) - goto outtinfo; + goto out; } else { tinfo->buf = NULL; } tinfo->id = id; + if (state == IP_VS_STATE_MASTER) + result = make_send_sock(ipvs, id, dev, &tinfo->sock); + else + result = make_receive_sock(ipvs, id, dev, &tinfo->sock); + if (result < 0) + goto out; task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); if (IS_ERR(task)) { result = PTR_ERR(task); - goto outtinfo; + goto out; } tinfo = NULL; if (state == IP_VS_STATE_MASTER) @@ -1911,20 +1906,20 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, ipvs->sync_state |= state; spin_unlock_bh(&ipvs->sync_buff_lock); + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); + /* increase the module use count */ ip_vs_use_count_inc(); return 0; -outsocket: - sock_release(sock); - -outtinfo: - if (tinfo) { - sock_release(tinfo->sock); - kfree(tinfo->buf); - kfree(tinfo); - } +out: + /* We do not need RTNL lock anymore, release it here so that + * sock_release below and in the kthreads can use rtnl_lock + * to leave the mcast group. + */ + rtnl_unlock(); count = id; while (count-- > 0) { if (state == IP_VS_STATE_MASTER) @@ -1932,13 +1927,23 @@ outtinfo: else kthread_stop(array[count]); } - kfree(array); - -out: if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { kfree(ipvs->ms); ipvs->ms = NULL; } + mutex_unlock(&ipvs->sync_mutex); + if (tinfo) { + if (tinfo->sock) + sock_release(tinfo->sock); + kfree(tinfo->buf); + kfree(tinfo); + } + kfree(array); + return result; + +out_early: + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); return result; } From 3f1e53abff84cf40b1adb3455d480dd295bf42e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Apr 2018 21:13:30 +0200 Subject: [PATCH 023/660] netfilter: ebtables: don't attempt to allocate 0-sized compat array Dmitry reports 32bit ebtables on 64bit kernel got broken by a recent change that returns -EINVAL when ruleset has no entries. ebtables however only counts user-defined chains, so for the initial table nentries will be 0. Don't try to allocate the compat array in this case, as no user defined rules exist no rule will need 64bit translation. Reported-by: Dmitry Vyukov Fixes: 7d7d7e02111e9 ("netfilter: compat: reject huge allocation requests") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 032e0fe45940..28a4c3490359 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1825,13 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info, { unsigned int size = info->entries_size; const void *entries = info->entries; - int ret; newinfo->entries_size = size; - - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); - if (ret) - return ret; + if (info->nentries) { + int ret = xt_compat_init_offsets(NFPROTO_BRIDGE, + info->nentries); + if (ret) + return ret; + } return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); From 0ea9924abe125a0c1277484f1a3d512b3f45c320 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 9 Apr 2018 17:01:21 +0800 Subject: [PATCH 024/660] md-cluster: don't update recovery_offset for faulty device Device could become faulty when clustered array handling METADATA_UPDATED msg, so we don't need to call read_rdev for this device. Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3bea45e8ccff..c208c01f63a5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9256,8 +9256,10 @@ void md_reload_sb(struct mddev *mddev, int nr) check_sb_changes(mddev, rdev); /* Read all rdev's to update recovery_offset */ - rdev_for_each_rcu(rdev, mddev) - read_rdev(mddev, rdev); + rdev_for_each_rcu(rdev, mddev) { + if (!test_bit(Faulty, &rdev->flags)) + read_rdev(mddev, rdev); + } } EXPORT_SYMBOL(md_reload_sb); From 8c2425932398a160f687534efe71f0ec4b92833e Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Mon, 9 Apr 2018 09:50:44 +0800 Subject: [PATCH 025/660] md/raid1: exit sync request if MD_RECOVERY_INTR is set We met a sync thread stuck as follows: raid1_sync_request+0x2c9/0xb50 md_do_sync+0x983/0xfa0 md_thread+0x11c/0x160 kthread+0x111/0x130 ret_from_fork+0x35/0x40 0xffffffffffffffff At the same time, there is a stuck mdadm thread (mdadm --manage /dev/md2 --add /dev/sda). It is trying to stop the sync thread: kthread_stop+0x42/0xf0 md_unregister_thread+0x3a/0x70 md_reap_sync_thread+0x15/0x160 action_store+0x142/0x2a0 md_attr_store+0x6c/0xb0 kernfs_fop_write+0x102/0x180 __vfs_write+0x33/0x170 vfs_write+0xad/0x1a0 SyS_write+0x52/0xc0 do_syscall_64+0x6e/0x190 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Debug tools show that the sync thread is waiting in raise_barrier(), until raid1d() end all normal IO bios into bio_end_io_list(introduced in commit 55ce74d4bfe1). But, raid1d() cannot end these bios if MD_CHANGE_PENDING bit is set. It needs to get mddev->reconfig_mutex lock and then clear the bit in md_check_recovery(). However, the lock is holding by mdadm in action_store(). Thus, there is a loop: mdadm waiting for sync thread to stop, sync thread waiting for raid1d() to end bios, raid1d() waiting for mdadm to release mddev->reconfig_mutex lock and then it can end bios. Fix this by checking MD_RECOVERY_INTR while waiting in raise_barrier(), so that sync thread can exit while mdadm is stoping the sync thread. Fixes: 55ce74d4bfe1 ("md/raid1: ensure device failure recorded before write request returns.") Signed-off-by: Jason Yan Signed-off-by: Yufen Yu Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e2943fb74056..6f624311cec2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -854,7 +854,7 @@ static void flush_pending_writes(struct r1conf *conf) * there is no normal IO happeing. It must arrange to call * lower_barrier when the particular background IO completes. */ -static void raise_barrier(struct r1conf *conf, sector_t sector_nr) +static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr) { int idx = sector_to_idx(sector_nr); @@ -885,13 +885,23 @@ static void raise_barrier(struct r1conf *conf, sector_t sector_nr) * max resync count which allowed on current I/O barrier bucket. */ wait_event_lock_irq(conf->wait_barrier, - !conf->array_frozen && + (!conf->array_frozen && !atomic_read(&conf->nr_pending[idx]) && - atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH, + atomic_read(&conf->barrier[idx]) < RESYNC_DEPTH) || + test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery), conf->resync_lock); + if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { + atomic_dec(&conf->barrier[idx]); + spin_unlock_irq(&conf->resync_lock); + wake_up(&conf->wait_barrier); + return -EINTR; + } + atomic_inc(&conf->nr_sync_pending); spin_unlock_irq(&conf->resync_lock); + + return 0; } static void lower_barrier(struct r1conf *conf, sector_t sector_nr) @@ -2662,9 +2672,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bitmap_cond_end_sync(mddev->bitmap, sector_nr, mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - r1_bio = raid1_alloc_init_r1buf(conf); - raise_barrier(conf, sector_nr); + + if (raise_barrier(conf, sector_nr)) + return 0; + + r1_bio = raid1_alloc_init_r1buf(conf); rcu_read_lock(); /* From 1aa3b3e0cbdb32439f04842e88fc7557a0777660 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 9 Apr 2018 22:31:19 +0800 Subject: [PATCH 026/660] fs: quota: Replace GFP_ATOMIC with GFP_KERNEL in dquot_init dquot_init() is never called in atomic context. This function is only set as a parameter of fs_initcall(). Despite never getting called from atomic context, dquot_init() calls __get_free_pages() with GFP_ATOMIC, which waits busily for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, to avoid busy waiting and improve the possibility of sucessful allocation. This is found by a static analysis tool named DCNS written by myself. And I also manually check it. Signed-off-by: Jia-Ju Bai Signed-off-by: Jan Kara --- fs/quota/dquot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 020c597ef9b6..d88231e3b2be 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2966,7 +2966,7 @@ static int __init dquot_init(void) NULL); order = 0; - dquot_hash = (struct hlist_head *)__get_free_pages(GFP_ATOMIC, order); + dquot_hash = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order); if (!dquot_hash) panic("Cannot create dquot hash table"); From dba40d46ebf41e3f7ac9480609529bb6037a918d Mon Sep 17 00:00:00 2001 From: Mariusz Dabrowski Date: Wed, 14 Feb 2018 14:23:30 +0100 Subject: [PATCH 027/660] raid1: copy write hint from master bio to behind bio Signed-off-by: Mariusz Dabrowski Reviewed-by: Artur Paszkiewicz Reviewed-by: Pawel Baldysiak Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6f624311cec2..e9e3308cb0a7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1102,6 +1102,8 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, goto skip_copy; } + behind_bio->bi_write_hint = bio->bi_write_hint; + while (i < vcnt && size) { struct page *page; int len = min_t(int, PAGE_SIZE, size); From c0db1b677e1d584fab5d7ac76a32e1c0157542e0 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Mon, 2 Apr 2018 15:10:35 +0800 Subject: [PATCH 028/660] drm/vc4: Fix memory leak during BO teardown During BO teardown, an indirect list 'uniform_addr_offsets' wasn't being freed leading to leaking many 128B allocations. Fix the memory leak by releasing it at teardown time. Cc: stable@vger.kernel.org Fixes: 6d45c81d229d ("drm/vc4: Add support for branching in shader validation.") Signed-off-by: Daniel J Blueman Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180402071035.25356-1-daniel@quora.org --- drivers/gpu/drm/vc4/vc4_bo.c | 2 ++ drivers/gpu/drm/vc4/vc4_validate_shaders.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 2decc8e2c79f..add9cc97a3b6 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -195,6 +195,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo) vc4_bo_set_label(obj, -1); if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; @@ -591,6 +592,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo) } if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index d3f15bf60900..7cf82b071de2 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -942,6 +942,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) fail: kfree(validation_state.branch_targets); if (validated_shader) { + kfree(validated_shader->uniform_addr_offsets); kfree(validated_shader->texture_samples); kfree(validated_shader); } From 386c6ddbda180676b7d9fc375d54a7bdd353d39e Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Tue, 10 Apr 2018 14:15:46 +0200 Subject: [PATCH 029/660] X86/VMX: Disable VMX preemption timer if MWAIT is not intercepted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VMX-preemption timer is used by KVM as a way to set deadlines for the guest (i.e. timer emulation). That was safe till very recently when capability KVM_X86_DISABLE_EXITS_MWAIT to disable intercepting MWAIT was introduced. According to Intel SDM 25.5.1: """ The VMX-preemption timer operates in the C-states C0, C1, and C2; it also operates in the shutdown and wait-for-SIPI states. If the timer counts down to zero in any state other than the wait-for SIPI state, the logical processor transitions to the C0 C-state and causes a VM exit; the timer does not cause a VM exit if it counts down to zero in the wait-for-SIPI state. The timer is not decremented in C-states deeper than C2. """ Now once the guest issues the MWAIT with a c-state deeper than C2 the preemption timer will never wake it up again since it stopped ticking! Usually this is compensated by other activities in the system that would wake the core from the deep C-state (and cause a VMExit). For example, if the host itself is ticking or it received interrupts, etc! So disable the VMX-preemption timer if MWAIT is exposed to the guest! Cc: Paolo Bonzini Cc: Radim Krčmář Cc: kvm@vger.kernel.org Signed-off-by: KarimAllah Ahmed Fixes: 4d5422cea3b61f158d58924cbb43feada456ba5c Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ce639e00fd0f..e7024ad586f7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12218,10 +12218,16 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift, static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 tscl = rdtsc(); - u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl); - u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; + struct vcpu_vmx *vmx; + u64 tscl, guest_tscl, delta_tsc; + + if (kvm_mwait_in_guest(vcpu->kvm)) + return -EOPNOTSUPP; + + vmx = to_vmx(vcpu); + tscl = rdtsc(); + guest_tscl = kvm_read_l1_tsc(vcpu, tscl); + delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; /* Convert to host delta tsc if tsc scaling is enabled */ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && From 4d5f26ee310237552a36aa14ceee96d6659153cd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 10 Apr 2018 13:38:56 +0100 Subject: [PATCH 030/660] kvm: selftests: fix spelling mistake: "divisable" and "divisible" Trivial fix to spelling mistakes in comment and message text Signed-off-by: Colin Ian King Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- tools/testing/selftests/kvm/lib/sparsebit.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7ca1bb40c498..e213d513dc61 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1435,7 +1435,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, sparsebit_idx_t pg; TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " - "not divisable by page size.\n" + "not divisible by page size.\n" " paddr_min: 0x%lx page_size: 0x%x", paddr_min, vm->page_size); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 0c5cf3e0cb6f..b132bc95d183 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -121,7 +121,7 @@ * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * - * + Node starting index is evenly divisable by the number of bits + * + Node starting index is evenly divisible by the number of bits * within a nodes mask member. * * + Nodes never represent a range of bits that wrap around the @@ -1741,7 +1741,7 @@ void sparsebit_validate_internal(struct sparsebit *s) /* Validate node index is divisible by the mask size */ if (nodep->idx % MASK_BITS) { - fprintf(stderr, "Node index not divisable by " + fprintf(stderr, "Node index not divisible by " "mask size,\n" " nodep: %p nodep->idx: 0x%lx " "MASK_BITS: %lu\n", From 0abf854d7cbbb405e39e0f93d5c1da98dca24bc0 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 10 Apr 2018 14:24:21 +0200 Subject: [PATCH 031/660] selftests: bpf: update .gitignore with missing generated files Signed-off-by: Anders Roxell Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9cf83f895d98..5e1ab2f0eb79 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -12,3 +12,6 @@ test_tcpbpf_user test_verifier_log feature test_libbpf_open +test_sock +test_sock_addr +urandom_read From 3a38bb98d9abdc3856f26b5ed4332803065cd7cf Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 10 Apr 2018 09:37:32 -0700 Subject: [PATCH 032/660] bpf/tracing: fix a deadlock in perf_event_detach_bpf_prog syzbot reported a possible deadlock in perf_event_detach_bpf_prog. The error details: ====================================================== WARNING: possible circular locking dependency detected 4.16.0-rc7+ #3 Not tainted ------------------------------------------------------ syz-executor7/24531 is trying to acquire lock: (bpf_event_mutex){+.+.}, at: [<000000008a849b07>] perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854 but task is already holding lock: (&mm->mmap_sem){++++}, at: [<0000000038768f87>] vm_mmap_pgoff+0x198/0x280 mm/util.c:353 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++}: __might_fault+0x13a/0x1d0 mm/memory.c:4571 _copy_to_user+0x2c/0xc0 lib/usercopy.c:25 copy_to_user include/linux/uaccess.h:155 [inline] bpf_prog_array_copy_info+0xf2/0x1c0 kernel/bpf/core.c:1694 perf_event_query_prog_array+0x1c7/0x2c0 kernel/trace/bpf_trace.c:891 _perf_ioctl kernel/events/core.c:4750 [inline] perf_ioctl+0x3e1/0x1480 kernel/events/core.c:4770 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686 SYSC_ioctl fs/ioctl.c:701 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 -> #0 (bpf_event_mutex){+.+.}: lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920 __mutex_lock_common kernel/locking/mutex.c:756 [inline] __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854 perf_event_free_bpf_prog kernel/events/core.c:8147 [inline] _free_event+0xbdb/0x10f0 kernel/events/core.c:4116 put_event+0x24/0x30 kernel/events/core.c:4204 perf_mmap_close+0x60d/0x1010 kernel/events/core.c:5172 remove_vma+0xb4/0x1b0 mm/mmap.c:172 remove_vma_list mm/mmap.c:2490 [inline] do_munmap+0x82a/0xdf0 mm/mmap.c:2731 mmap_region+0x59e/0x15a0 mm/mmap.c:1646 do_mmap+0x6c0/0xe00 mm/mmap.c:1483 do_mmap_pgoff include/linux/mm.h:2223 [inline] vm_mmap_pgoff+0x1de/0x280 mm/util.c:355 SYSC_mmap_pgoff mm/mmap.c:1533 [inline] SyS_mmap_pgoff+0x462/0x5f0 mm/mmap.c:1491 SYSC_mmap arch/x86/kernel/sys_x86_64.c:100 [inline] SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:91 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(bpf_event_mutex); lock(&mm->mmap_sem); lock(bpf_event_mutex); *** DEADLOCK *** ====================================================== The bug is introduced by Commit f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") where copy_to_user, which requires mm->mmap_sem, is called inside bpf_event_mutex lock. At the same time, during perf_event file descriptor close, mm->mmap_sem is held first and then subsequent perf_event_detach_bpf_prog needs bpf_event_mutex lock. Such a senario caused a deadlock. As suggested by Daniel, moving copy_to_user out of the bpf_event_mutex lock should fix the problem. Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") Reported-by: syzbot+dc5ca0e4c9bfafaf2bae@syzkaller.appspotmail.com Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 4 ++-- kernel/bpf/core.c | 45 ++++++++++++++++++++++++++-------------- kernel/trace/bpf_trace.c | 25 ++++++++++++++++++---- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 95a7abd0ee92..486e65e3db26 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -339,8 +339,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, struct bpf_prog *old_prog); int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, - __u32 __user *prog_ids, u32 request_cnt, - __u32 __user *prog_cnt); + u32 *prog_ids, u32 request_cnt, + u32 *prog_cnt); int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d315b393abdd..ba03ec39efb3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1572,13 +1572,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) return cnt; } +static bool bpf_prog_array_copy_core(struct bpf_prog **prog, + u32 *prog_ids, + u32 request_cnt) +{ + int i = 0; + + for (; *prog; prog++) { + if (*prog == &dummy_bpf_prog.prog) + continue; + prog_ids[i] = (*prog)->aux->id; + if (++i == request_cnt) { + prog++; + break; + } + } + + return !!(*prog); +} + int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, __u32 __user *prog_ids, u32 cnt) { struct bpf_prog **prog; unsigned long err = 0; - u32 i = 0, *ids; bool nospc; + u32 *ids; /* users of this function are doing: * cnt = bpf_prog_array_length(); @@ -1595,16 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, return -ENOMEM; rcu_read_lock(); prog = rcu_dereference(progs)->progs; - for (; *prog; prog++) { - if (*prog == &dummy_bpf_prog.prog) - continue; - ids[i] = (*prog)->aux->id; - if (++i == cnt) { - prog++; - break; - } - } - nospc = !!(*prog); + nospc = bpf_prog_array_copy_core(prog, ids, cnt); rcu_read_unlock(); err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); kfree(ids); @@ -1683,22 +1693,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, } int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, - __u32 __user *prog_ids, u32 request_cnt, - __u32 __user *prog_cnt) + u32 *prog_ids, u32 request_cnt, + u32 *prog_cnt) { + struct bpf_prog **prog; u32 cnt = 0; if (array) cnt = bpf_prog_array_length(array); - if (copy_to_user(prog_cnt, &cnt, sizeof(cnt))) - return -EFAULT; + *prog_cnt = cnt; /* return early if user requested only program count or nothing to copy */ if (!request_cnt || !cnt) return 0; - return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt); + /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ + prog = rcu_dereference_check(array, 1)->progs; + return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC + : 0; } static void bpf_prog_free_deferred(struct work_struct *work) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d88e96d4e12c..56ba0f2a01db 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -977,6 +977,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) { struct perf_event_query_bpf __user *uquery = info; struct perf_event_query_bpf query = {}; + u32 *ids, prog_cnt, ids_len; int ret; if (!capable(CAP_SYS_ADMIN)) @@ -985,16 +986,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) return -EINVAL; if (copy_from_user(&query, uquery, sizeof(query))) return -EFAULT; - if (query.ids_len > BPF_TRACE_MAX_PROGS) + + ids_len = query.ids_len; + if (ids_len > BPF_TRACE_MAX_PROGS) return -E2BIG; + ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); + if (!ids) + return -ENOMEM; + /* + * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which + * is required when user only wants to check for uquery->prog_cnt. + * There is no need to check for it since the case is handled + * gracefully in bpf_prog_array_copy_info. + */ mutex_lock(&bpf_event_mutex); ret = bpf_prog_array_copy_info(event->tp_event->prog_array, - uquery->ids, - query.ids_len, - &uquery->prog_cnt); + ids, + ids_len, + &prog_cnt); mutex_unlock(&bpf_event_mutex); + if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || + copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) + ret = -EFAULT; + + kfree(ids); return ret; } From 1f5781725dcbb026438e77091c91a94f678c3522 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 10 Apr 2018 15:15:16 +0900 Subject: [PATCH 033/660] commoncap: Handle memory allocation failure. syzbot is reporting NULL pointer dereference at xattr_getsecurity() [1], for cap_inode_getsecurity() is returning sizeof(struct vfs_cap_data) when memory allocation failed. Return -ENOMEM if memory allocation failed. [1] https://syzkaller.appspot.com/bug?id=a55ba438506fe68649a5f50d2d82d56b365e0107 Signed-off-by: Tetsuo Handa Fixes: 8db6c34f1dbc8e06 ("Introduce v3 namespaced file capabilities") Reported-by: syzbot Cc: stable # 4.14+ Acked-by: Serge E. Hallyn Acked-by: James Morris Signed-off-by: Eric W. Biederman --- security/commoncap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/commoncap.c b/security/commoncap.c index 48620c93d697..1ce701fcb3f3 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -449,6 +449,8 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, magic |= VFS_CAP_FLAGS_EFFECTIVE; memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); cap->magic_etc = cpu_to_le32(magic); + } else { + size = -ENOMEM; } } kfree(tmpbuf); From 5ac7c2fd6e7102532104907c0df94abca826ec5c Mon Sep 17 00:00:00 2001 From: Kyle Spiers Date: Tue, 10 Apr 2018 17:02:29 -0700 Subject: [PATCH 034/660] isofs compress: Remove VLA usage As part of the effort to remove VLAs from the kernel[1], this changes the allocation of the bhs and pages arrays from being on the stack to being kcalloc()ed. This also allows for the removal of the explicit zeroing of bhs. https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Kyle Spiers Signed-off-by: Jan Kara --- fs/isofs/compress.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 9bb2fe35799d..10205ececc27 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -59,7 +60,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, >> bufshift; int haveblocks; blkcnt_t blocknum; - struct buffer_head *bhs[needblocks + 1]; + struct buffer_head **bhs; int curbh, curpage; if (block_size > deflateBound(1UL << zisofs_block_shift)) { @@ -80,7 +81,11 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, /* Because zlib is not thread-safe, do all the I/O at the top. */ blocknum = block_start >> bufshift; - memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *)); + bhs = kcalloc(needblocks + 1, sizeof(*bhs), GFP_KERNEL); + if (!bhs) { + *errp = -ENOMEM; + return 0; + } haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks); ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs); @@ -190,6 +195,7 @@ z_eio: b_eio: for (i = 0; i < haveblocks; i++) brelse(bhs[i]); + kfree(bhs); return stream.total_out; } @@ -305,7 +311,7 @@ static int zisofs_readpage(struct file *file, struct page *page) unsigned int zisofs_pages_per_cblock = PAGE_SHIFT <= zisofs_block_shift ? (1 << (zisofs_block_shift - PAGE_SHIFT)) : 0; - struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)]; + struct page **pages; pgoff_t index = page->index, end_index; end_index = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -330,6 +336,12 @@ static int zisofs_readpage(struct file *file, struct page *page) full_page = 0; pcount = 1; } + pages = kcalloc(max_t(unsigned int, zisofs_pages_per_cblock, 1), + sizeof(*pages), GFP_KERNEL); + if (!pages) { + unlock_page(page); + return -ENOMEM; + } pages[full_page] = page; for (i = 0; i < pcount; i++, index++) { @@ -357,6 +369,7 @@ static int zisofs_readpage(struct file *file, struct page *page) } /* At this point, err contains 0 or -EIO depending on the "critical" page */ + kfree(pages); return err; } From cf43ae63c024971e6df94665e829c01c22202a19 Mon Sep 17 00:00:00 2001 From: Jack Ma Date: Fri, 6 Apr 2018 15:45:16 +1200 Subject: [PATCH 035/660] netfilter: xt_connmark: Add bit mapping for bit-shift operation. With the addition of bit-shift operations, we are able to shift ct/skbmark based on user requirements. However, this change might also cause the most left/right hand- side mark to be accidentially lost during shift operations. This patch adds the ability to 'grep' certain bits based on ctmask or nfmask out of the original mark. Then, apply shift operations to achieve a new mapping between ctmark and skb->mark. For example: If someone would like save the fourth F bits of ctmark 0xFFF(F)000F into the seventh hexadecimal (0) skb->mark 0xABC000(0)E. new_targetmark = (ctmark & ctmask) >> 12; (new) skb->mark = (skb->mark &~nfmask) ^ new_targetmark; This will preserve the other bits that are not related to this operation. Fixes: 472a73e00757 ("netfilter: xt_conntrack: Support bit-shifting for CONNMARK & MARK targets.") Reviewed-by: Florian Westphal Signed-off-by: Jack Ma Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connmark.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 773da82190dc..4b424e6caf3e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -41,6 +41,7 @@ connmark_tg_shift(struct sk_buff *skb, u8 shift_bits, u8 shift_dir) { enum ip_conntrack_info ctinfo; + u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; @@ -61,24 +62,26 @@ connmark_tg_shift(struct sk_buff *skb, } break; case XT_CONNMARK_SAVE: - newmark = (ct->mark & ~info->ctmask) ^ - (skb->mark & info->nfmask); + new_targetmark = (skb->mark & info->nfmask); if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + new_targetmark >>= shift_bits; else - newmark <<= shift_bits; + new_targetmark <<= shift_bits; + newmark = (ct->mark & ~info->ctmask) ^ + new_targetmark; if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - newmark = (skb->mark & ~info->nfmask) ^ - (ct->mark & info->ctmask); + new_targetmark = (ct->mark & info->ctmask); if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + new_targetmark >>= shift_bits; else - newmark <<= shift_bits; + new_targetmark <<= shift_bits; + newmark = (skb->mark & ~info->nfmask) ^ + new_targetmark; skb->mark = newmark; break; } From 8e9b29b61851ba452e33373743fadb52778e9075 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Wed, 11 Apr 2018 11:16:03 +0200 Subject: [PATCH 036/660] X86/KVM: Do not allow DISABLE_EXITS_MWAIT when LAPIC ARAT is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the processor does not have an "Always Running APIC Timer" (aka ARAT), we should not give guests direct access to MWAIT. The LAPIC timer would stop ticking in deep C-states, so any host deadlines would not wakeup the host kernel. The host kernel intel_idle driver handles this by switching to broadcast mode when ARAT is not available and MWAIT is issued with a deep C-state that would stop the LAPIC timer. When MWAIT is passed through, we can not tell when MWAIT is issued. So just disable this capability when LAPIC ARAT is not available. I am not even sure if there are any CPUs with VMX support but no LAPIC ARAT or not. Cc: Paolo Bonzini Cc: Radim Krčmář Reported-by: Wanpeng Li Signed-off-by: KarimAllah Ahmed Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b2ff74b12ec4..0334b250e102 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2819,7 +2819,8 @@ out: static inline bool kvm_can_mwait_in_guest(void) { return boot_cpu_has(X86_FEATURE_MWAIT) && - !boot_cpu_has_bug(X86_BUG_MONITOR); + !boot_cpu_has_bug(X86_BUG_MONITOR) && + boot_cpu_has(X86_FEATURE_ARAT); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) From 2698d82e519413c6ad287e6f14b29e0373ed37f8 Mon Sep 17 00:00:00 2001 From: hu huajun Date: Wed, 11 Apr 2018 15:16:40 +0800 Subject: [PATCH 037/660] KVM: X86: fix incorrect reference of trace_kvm_pi_irte_update In arch/x86/kvm/trace.h, this function is declared as host_irq the first input, and vcpu_id the second, instead of otherwise. Signed-off-by: hu huajun Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 5 ++--- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e0a3f56a791d..b3ebc8ad6891 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5266,9 +5266,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, } if (!ret && svm) { - trace_kvm_pi_irte_update(svm->vcpu.vcpu_id, - host_irq, e->gsi, - vcpu_info.vector, + trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id, + e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e7024ad586f7..fe3e13edf201 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12533,7 +12533,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; - trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, + trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); if (set) From 300ad8992913025b4294d4fc37b6bfff4a8b7ad1 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Fri, 6 Apr 2018 16:07:59 -0600 Subject: [PATCH 038/660] mmc: sdhci-pci: Only do AMD tuning for HS200 Commit c31165d7400b ("mmc: sdhci-pci: Add support for HS200 tuning mode on AMD, eMMC-4.5.1") added a HS200 tuning method for use with AMD SDHCI controllers. As described in the commit subject, this tuning is specific for HS200. However, as implemented, this method is used for all host timings, because platform_execute_tuning, if it exists, is called unconditionally by sdhci_execute_tuning(). This breaks tuning when using the AMD controller with, for example, a DDR50 SD card. Instead, we can implement an amd execute_tuning wrapper callback, and then conditionally do the HS200 specific tuning for HS200, and otherwise call back to the standard sdhci_execute_tuning(). Signed-off-by: Daniel Kurtz Acked-by: Shyam Sundar S K Acked-by: Adrian Hunter Fixes: c31165d7400b ("mmc: sdhci-pci: Add support for HS200 tuning mode on AMD, eMMC-4.5.1") Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 787434e5589d..78c25ad35fd2 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1312,7 +1312,7 @@ static void amd_enable_manual_tuning(struct pci_dev *pdev) pci_write_config_dword(pdev, AMD_SD_MISC_CONTROL, val); } -static int amd_execute_tuning(struct sdhci_host *host, u32 opcode) +static int amd_execute_tuning_hs200(struct sdhci_host *host, u32 opcode) { struct sdhci_pci_slot *slot = sdhci_priv(host); struct pci_dev *pdev = slot->chip->pdev; @@ -1351,6 +1351,27 @@ static int amd_execute_tuning(struct sdhci_host *host, u32 opcode) return 0; } +static int amd_execute_tuning(struct mmc_host *mmc, u32 opcode) +{ + struct sdhci_host *host = mmc_priv(mmc); + + /* AMD requires custom HS200 tuning */ + if (host->timing == MMC_TIMING_MMC_HS200) + return amd_execute_tuning_hs200(host, opcode); + + /* Otherwise perform standard SDHCI tuning */ + return sdhci_execute_tuning(mmc, opcode); +} + +static int amd_probe_slot(struct sdhci_pci_slot *slot) +{ + struct mmc_host_ops *ops = &slot->host->mmc_host_ops; + + ops->execute_tuning = amd_execute_tuning; + + return 0; +} + static int amd_probe(struct sdhci_pci_chip *chip) { struct pci_dev *smbus_dev; @@ -1385,12 +1406,12 @@ static const struct sdhci_ops amd_sdhci_pci_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, - .platform_execute_tuning = amd_execute_tuning, }; static const struct sdhci_pci_fixes sdhci_amd = { .probe = amd_probe, .ops = &amd_sdhci_pci_ops, + .probe_slot = amd_probe_slot, }; static const struct pci_device_id pci_ids[] = { From 7ecb46e9ee9af18e304eb9e7d6804c59a408e846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabi=C3=A1n=20Inostroza?= Date: Thu, 12 Apr 2018 00:37:35 -0300 Subject: [PATCH 039/660] ALSA: line6: Use correct endpoint type for midi output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sending MIDI messages to a PODxt through the USB connection shows "usb_submit_urb failed" in dmesg and the message is not received by the POD. The error is caused because in the funcion send_midi_async() in midi.c there is a call to usb_sndbulkpipe() for endpoint 3 OUT, but the PODxt USB descriptor shows that this endpoint it's an interrupt endpoint. Patch tested with PODxt only. [ The bug has been present from the very beginning in the staging driver time, but Fixes below points to the commit moving to sound/ directory so that the fix can be cleanly applied -- tiwai ] Fixes: 61864d844c29 ("ALSA: move line6 usb driver into sound/usb") Signed-off-by: Fabián Inostroza Cc: Signed-off-by: Takashi Iwai --- sound/usb/line6/midi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c index 6d7cde56a355..e2cf55c53ea8 100644 --- a/sound/usb/line6/midi.c +++ b/sound/usb/line6/midi.c @@ -125,7 +125,7 @@ static int send_midi_async(struct usb_line6 *line6, unsigned char *data, } usb_fill_int_urb(urb, line6->usbdev, - usb_sndbulkpipe(line6->usbdev, + usb_sndintpipe(line6->usbdev, line6->properties->ep_ctrl_w), transfer_buffer, length, midi_sent, line6, line6->interval); From 573eda59c772d11fc2b56d525dfb698b0f87ddb3 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 12 Apr 2018 11:23:15 +0300 Subject: [PATCH 040/660] ASoC: dmic: Fix clock parenting In 4.16 the clock hierarchy got changed by a5c82a09d876 ARM: dts: omap4: add clkctrl nodes The fck of dmic is no longer a mux clock, it's parent is. Signed-off-by: Tero Kristo Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown Cc: stable@vger.kernel.org # 4.16+ --- sound/soc/omap/omap-dmic.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/soc/omap/omap-dmic.c b/sound/soc/omap/omap-dmic.c index 09db2aec12a3..b2f5d2fa354d 100644 --- a/sound/soc/omap/omap-dmic.c +++ b/sound/soc/omap/omap-dmic.c @@ -281,7 +281,7 @@ static int omap_dmic_dai_trigger(struct snd_pcm_substream *substream, static int omap_dmic_select_fclk(struct omap_dmic *dmic, int clk_id, unsigned int freq) { - struct clk *parent_clk; + struct clk *parent_clk, *mux; char *parent_clk_name; int ret = 0; @@ -329,14 +329,21 @@ static int omap_dmic_select_fclk(struct omap_dmic *dmic, int clk_id, return -ENODEV; } + mux = clk_get_parent(dmic->fclk); + if (IS_ERR(mux)) { + dev_err(dmic->dev, "can't get fck mux parent\n"); + clk_put(parent_clk); + return -ENODEV; + } + mutex_lock(&dmic->mutex); if (dmic->active) { /* disable clock while reparenting */ pm_runtime_put_sync(dmic->dev); - ret = clk_set_parent(dmic->fclk, parent_clk); + ret = clk_set_parent(mux, parent_clk); pm_runtime_get_sync(dmic->dev); } else { - ret = clk_set_parent(dmic->fclk, parent_clk); + ret = clk_set_parent(mux, parent_clk); } mutex_unlock(&dmic->mutex); @@ -349,6 +356,7 @@ static int omap_dmic_select_fclk(struct omap_dmic *dmic, int clk_id, dmic->fclk_freq = freq; err_busy: + clk_put(mux); clk_put(parent_clk); return ret; From 619d3a2922ce623ca2eca443cc936810d328317c Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Wed, 4 Apr 2018 14:24:11 -0700 Subject: [PATCH 041/660] HID: wacom: bluetooth: send exit report for recent Bluetooth devices The code path for recent Bluetooth devices omits an exit report which resets all the values of the device. Fixes: 4922cd26f0 ("HID: wacom: Support 2nd-gen Intuos Pro's Bluetooth classic interface") Cc: # 4.11 Signed-off-by: Aaron Armstrong Skomra Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 76 +++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 6da16a879c9f..5f947ec20dcb 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -689,6 +689,45 @@ static int wacom_intuos_get_tool_type(int tool_id) return tool_type; } +static void wacom_exit_report(struct wacom_wac *wacom) +{ + struct input_dev *input = wacom->pen_input; + struct wacom_features *features = &wacom->features; + unsigned char *data = wacom->data; + int idx = (features->type == INTUOS) ? (data[1] & 0x01) : 0; + + /* + * Reset all states otherwise we lose the initial states + * when in-prox next time + */ + input_report_abs(input, ABS_X, 0); + input_report_abs(input, ABS_Y, 0); + input_report_abs(input, ABS_DISTANCE, 0); + input_report_abs(input, ABS_TILT_X, 0); + input_report_abs(input, ABS_TILT_Y, 0); + if (wacom->tool[idx] >= BTN_TOOL_MOUSE) { + input_report_key(input, BTN_LEFT, 0); + input_report_key(input, BTN_MIDDLE, 0); + input_report_key(input, BTN_RIGHT, 0); + input_report_key(input, BTN_SIDE, 0); + input_report_key(input, BTN_EXTRA, 0); + input_report_abs(input, ABS_THROTTLE, 0); + input_report_abs(input, ABS_RZ, 0); + } else { + input_report_abs(input, ABS_PRESSURE, 0); + input_report_key(input, BTN_STYLUS, 0); + input_report_key(input, BTN_STYLUS2, 0); + input_report_key(input, BTN_TOUCH, 0); + input_report_abs(input, ABS_WHEEL, 0); + if (features->type >= INTUOS3S) + input_report_abs(input, ABS_Z, 0); + } + input_report_key(input, wacom->tool[idx], 0); + input_report_abs(input, ABS_MISC, 0); /* reset tool id */ + input_event(input, EV_MSC, MSC_SERIAL, wacom->serial[idx]); + wacom->id[idx] = 0; +} + static int wacom_intuos_inout(struct wacom_wac *wacom) { struct wacom_features *features = &wacom->features; @@ -741,36 +780,7 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) if (!wacom->id[idx]) return 1; - /* - * Reset all states otherwise we lose the initial states - * when in-prox next time - */ - input_report_abs(input, ABS_X, 0); - input_report_abs(input, ABS_Y, 0); - input_report_abs(input, ABS_DISTANCE, 0); - input_report_abs(input, ABS_TILT_X, 0); - input_report_abs(input, ABS_TILT_Y, 0); - if (wacom->tool[idx] >= BTN_TOOL_MOUSE) { - input_report_key(input, BTN_LEFT, 0); - input_report_key(input, BTN_MIDDLE, 0); - input_report_key(input, BTN_RIGHT, 0); - input_report_key(input, BTN_SIDE, 0); - input_report_key(input, BTN_EXTRA, 0); - input_report_abs(input, ABS_THROTTLE, 0); - input_report_abs(input, ABS_RZ, 0); - } else { - input_report_abs(input, ABS_PRESSURE, 0); - input_report_key(input, BTN_STYLUS, 0); - input_report_key(input, BTN_STYLUS2, 0); - input_report_key(input, BTN_TOUCH, 0); - input_report_abs(input, ABS_WHEEL, 0); - if (features->type >= INTUOS3S) - input_report_abs(input, ABS_Z, 0); - } - input_report_key(input, wacom->tool[idx], 0); - input_report_abs(input, ABS_MISC, 0); /* reset tool id */ - input_event(input, EV_MSC, MSC_SERIAL, wacom->serial[idx]); - wacom->id[idx] = 0; + wacom_exit_report(wacom); return 2; } @@ -1235,6 +1245,12 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) if (!valid) continue; + if (!prox) { + wacom->shared->stylus_in_proximity = false; + wacom_exit_report(wacom); + input_sync(pen_input); + return; + } if (range) { input_report_abs(pen_input, ABS_X, get_unaligned_le16(&frame[1])); input_report_abs(pen_input, ABS_Y, get_unaligned_le16(&frame[3])); From 9dc9a95f03a69ab926d9ff1986ab2087f34a5dce Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Tue, 3 Apr 2018 21:18:33 +0300 Subject: [PATCH 042/660] perf stat: Enable 1ms interval for printing event counters values Currently print count interval for performance counters values is limited by 10ms so reading the values at frequencies higher than 100Hz is restricted by the tool. This change makes perf stat -I possible on frequencies up to 1KHz and, to some extent, makes perf stat -I to be on-par with perf record sampling profiling. When running perf stat -I for monitoring e.g. PCIe uncore counters and at the same time profiling some I/O workload by perf record e.g. for cpu-cycles and context switches, it is then possible to observe consolidated CPU/OS/IO(Uncore) performance picture for that workload. Tool overhead warning printed when specifying -v option can be missed due to screen scrolling in case you have output to the console so message is moved into help available by running perf stat -h. Signed-off-by: Alexey Budankov Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/b842ad6a-d606-32e4-afe5-974071b5198e@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 2 +- tools/perf/builtin-stat.c | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index f15b306be183..e6c3b4e555c2 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -153,7 +153,7 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m -I msecs:: --interval-print msecs:: -Print count deltas every N milliseconds (minimum: 10ms) +Print count deltas every N milliseconds (minimum: 1ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5c454855908..147a27e8c937 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1943,7 +1943,8 @@ static const struct option stat_options[] = { OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &stat_config.interval, - "print counts at regular interval in ms (>= 10)"), + "print counts at regular interval in ms " + "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, @@ -2923,17 +2924,6 @@ int cmd_stat(int argc, const char **argv) } } - if (interval && interval < 100) { - if (interval < 10) { - pr_err("print interval must be >= 10ms\n"); - parse_options_usage(stat_usage, stat_options, "I", 1); - goto out; - } else - pr_warning("print interval < 100ms. " - "The overhead percentage could be high in some cases. " - "Please proceed with caution.\n"); - } - if (stat_config.times && interval) interval_count = true; else if (stat_config.times && !interval) { From 4d3b57da1593c66835d8e3a757e4751b35493fb8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 4 Apr 2018 17:34:45 +0100 Subject: [PATCH 043/660] tools headers: Restore READ_ONCE() C++ compatibility Our userspace defines READ_ONCE() in a way that clang doesn't like, as we have an anonymous union in which neither field is initialized. WRITE_ONCE() is fine since it initializes the __val field. For READ_ONCE() we can keep clang and GCC happy with a dummy initialization of the __c field, so let's do that. At the same time, let's split READ_ONCE() and WRITE_ONCE() over several lines for legibility, as we do in the in-kernel . Reported-by: Li Zhijian Reported-by: Sandipan Das Tested-by: Sandipan Das Signed-off-by: Mark Rutland Fixes: 6aa7de059173a986 ("locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE()") Link: http://lkml.kernel.org/r/20180404163445.16492-1-mark.rutland@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 04e32f965ad7..1827c2f973f9 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -151,11 +151,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * required ordering. */ -#define READ_ONCE(x) \ - ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) -#define WRITE_ONCE(x, val) \ - ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) #ifndef __fallthrough From af72cfb80af5e4cafd8e0b58ac54f222c913aa1b Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 10 Apr 2018 19:16:24 -0500 Subject: [PATCH 044/660] perf tests: Run dwarf unwind test on arm32 Enable the unwind test on arm32: $ perf test unwind 58: DWARF unwind : Ok Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Brian Robbins Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180410191624.a3a468670dd4548c66d3d094@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/include/arch-tests.h | 12 ++++++++++++ tools/perf/arch/arm/tests/Build | 2 ++ tools/perf/arch/arm/tests/arch-tests.c | 16 ++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 tools/perf/arch/arm/include/arch-tests.h create mode 100644 tools/perf/arch/arm/tests/arch-tests.c diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h new file mode 100644 index 000000000000..90ec4c8cb880 --- /dev/null +++ b/tools/perf/arch/arm/include/arch-tests.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index b30eff9bcc83..883c57ff0c08 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build @@ -1,2 +1,4 @@ libperf-y += regs_load.o libperf-y += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c new file mode 100644 index 000000000000..5b1543c98022 --- /dev/null +++ b/tools/perf/arch/arm/tests/arch-tests.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, +#endif + { + .func = NULL, + }, +}; From 3e83eda467050f13fa69d888993458b76e733de9 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Mon, 9 Apr 2018 15:41:31 +0800 Subject: [PATCH 045/660] HID: i2c-hid: Fix resume issue on Raydium touchscreen device When Rayd touchscreen resumed from S3, it issues too many errors like: i2c_hid i2c-RAYD0001:00: i2c_hid_get_input: incomplete report (58/5442) And all the report data are corrupted, touchscreen is unresponsive. Fix this by re-sending report description command after resume. Add device ID as a quirk. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/i2c-hid/i2c-hid.c | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5a3a7ead3012..0b5cc910f62e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -525,6 +525,9 @@ #define I2C_VENDOR_ID_HANTICK 0x0911 #define I2C_PRODUCT_ID_HANTICK_5288 0x5288 +#define I2C_VENDOR_ID_RAYD 0x2386 +#define I2C_PRODUCT_ID_RAYD_3118 0x3118 + #define USB_VENDOR_ID_HANWANG 0x0b57 #define USB_DEVICE_ID_HANWANG_TABLET_FIRST 0x5000 #define USB_DEVICE_ID_HANWANG_TABLET_LAST 0x8fff diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 97689e98e53f..615a91ac93bd 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -47,6 +47,7 @@ /* quirks to control the device */ #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV BIT(0) #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET BIT(1) +#define I2C_HID_QUIRK_RESEND_REPORT_DESCR BIT(2) /* flags */ #define I2C_HID_STARTED 0 @@ -171,6 +172,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV }, { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288, I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, + { I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_3118, + I2C_HID_QUIRK_RESEND_REPORT_DESCR }, { 0, 0 } }; @@ -1220,6 +1223,16 @@ static int i2c_hid_resume(struct device *dev) if (ret) return ret; + /* RAYDIUM device (2386:3118) need to re-send report descr cmd + * after resume, after this it will be back normal. + * otherwise it issues too many incomplete reports. + */ + if (ihid->quirks & I2C_HID_QUIRK_RESEND_REPORT_DESCR) { + ret = i2c_hid_command(client, &hid_report_descr_cmd, NULL, 0); + if (!ret) + return ret; + } + if (hid->driver && hid->driver->reset_resume) { ret = hid->driver->reset_resume(hid); return ret; From 592c10e217f3edb35c7e0deba161fef69ad1a336 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 10:30:03 -0300 Subject: [PATCH 046/660] perf annotate: Allow showing offsets in more than just jump targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jesper wanted to see offsets at callq sites when doing some performance investigation related to retpolines, so save him some time by providing an 'struct annotation_options' to control where offsets should appear: just on jump targets? That + call instructions? All? This puts in place the logic to show the offsets, now we need to wire this up in the TUI browser (next patch) and on the 'perf annotate --stdio2" interface, where we need a more general mechanism to setup the 'annotation_options' struct from the command line. Suggested-by: Jesper Dangaard Brouer Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Linus Torvalds Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-m3jc9c3swobye9tj08gnh5i7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 11 +++++++++-- tools/perf/util/annotate.h | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index fbad8dfbb186..5edc565d86c4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -46,6 +46,7 @@ struct annotation_options annotation__default_options = { .use_offset = true, .jump_arrows = true, + .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS, }; const char *disassembler_style; @@ -2512,7 +2513,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!notes->options->use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { - if (al->jump_sources) { + if (al->jump_sources && + notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { if (notes->options->show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", @@ -2523,9 +2525,14 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati obj__printf(obj, bf); obj__set_color(obj, prev); } - +print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); + } else if (ins__is_call(&disasm_line(al)->ins) && + notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + goto print_addr; + } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index db8d09bea07e..f28a9e43421d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -70,8 +70,17 @@ struct annotation_options { show_nr_jumps, show_nr_samples, show_total_period; + u8 offset_level; }; +enum { + ANNOTATION__OFFSET_JUMP_TARGETS = 1, + ANNOTATION__OFFSET_CALL, + ANNOTATION__MAX_OFFSET_LEVEL, +}; + +#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS + extern struct annotation_options annotation__default_options; struct annotation; From 51f39603b5f260c73635f4d06d390476b32df6a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 10:41:23 -0300 Subject: [PATCH 047/660] perf annotate browser: Allow showing offsets in more than just jump targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jesper wanted to see offsets at callq sites when doing some performance investigation related to retpolines, so save him some time by providing a 'O' hotkey to allow showing offsets from function start at call instructions or in all instructions, just go on pressing 'O' till the offsets you need appear. Example: Starts with: Samples: 64 of event 'cycles:ppp', 100000 Hz, Event count (approx.): 318963 ixgbe_read_reg /proc/kcore Percent│ ↑ je 2a │ ┌──cmp $0xffffffff,%r13d │ ├──je d0 │ │ mov $0x53e3,%edi │ │→ callq __const_udelay │ │ sub $0x1,%r15d │ │↑ jne 83 │ │ mov 0x8(%rbp),%rax │ │ testb $0x20,0x1799(%rax) │ │↑ je 2a │ │ mov 0x200(%rax),%rdi │ │ mov %r13d,%edx │ │ mov $0xffffffffc02595d8,%rsi │ │→ callq netdev_warn │ │↑ jmpq 2a │d0:└─→mov 0x8(%rbp),%rsi │ mov %rbp,%rdi │ mov %eax,0x4(%rsp) │ → callq ixgbe_remove_adapter.isra.77 │ mov 0x4(%rsp),%eax Press 'h' for help on key bindings ============================================================================ Pess 'O': Samples: 64 of event 'cycles:ppp', 100000 Hz, Event count (approx.): 318963 ixgbe_read_reg /proc/kcore Percent│ ↑ je 2a │ ┌──cmp $0xffffffff,%r13d │ ├──je d0 │ │ mov $0x53e3,%edi │99:│→ callq __const_udelay │ │ sub $0x1,%r15d │ │↑ jne 83 │ │ mov 0x8(%rbp),%rax │ │ testb $0x20,0x1799(%rax) │ │↑ je 2a │ │ mov 0x200(%rax),%rdi │ │ mov %r13d,%edx │ │ mov $0xffffffffc02595d8,%rsi │c6:│→ callq netdev_warn │ │↑ jmpq 2a │d0:└─→mov 0x8(%rbp),%rsi │ mov %rbp,%rdi │ mov %eax,0x4(%rsp) │db: → callq ixgbe_remove_adapter.isra.77 │ mov 0x4(%rsp),%eax Press 'h' for help on key bindings ============================================================================ Press 'O' again: Samples: 64 of event 'cycles:ppp', 100000 Hz, Event count (approx.): 318963 ixgbe_read_reg /proc/kcore Percent│8c: ↑ je 2a │8e:┌──cmp $0xffffffff,%r13d │92:├──je d0 │94:│ mov $0x53e3,%edi │99:│→ callq __const_udelay │9e:│ sub $0x1,%r15d │a2:│↑ jne 83 │a4:│ mov 0x8(%rbp),%rax │a8:│ testb $0x20,0x1799(%rax) │af:│↑ je 2a │b5:│ mov 0x200(%rax),%rdi │bc:│ mov %r13d,%edx │bf:│ mov $0xffffffffc02595d8,%rsi │c6:│→ callq netdev_warn │cb:│↑ jmpq 2a │d0:└─→mov 0x8(%rbp),%rsi │d4: mov %rbp,%rdi │d7: mov %eax,0x4(%rsp) │db: → callq ixgbe_remove_adapter.isra.77 │e0: mov 0x4(%rsp),%eax Press 'h' for help on key bindings ============================================================================ Press 'O' again and it will show just jump target offsets. Suggested-by: Jesper Dangaard Brouer Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Linus Torvalds Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-upp6pfdetwlsx18ec2uf1od4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 12c099a87f8b..3781d74088a7 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -692,6 +692,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "J Toggle showing number of jump sources on targets\n" "n Search next string\n" "o Toggle disassembler output/simplified view\n" + "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n" "s Toggle source code view\n" "t Circulate percent, total period, samples view\n" "/ Search string\n" @@ -719,6 +720,10 @@ static int annotate_browser__run(struct annotate_browser *browser, notes->options->use_offset = !notes->options->use_offset; annotation__update_column_widths(notes); continue; + case 'O': + if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + continue; case 'j': notes->options->jump_arrows = !notes->options->jump_arrows; continue; From e14b733c5dc62f574b4dbc045b2cc52b03d83d4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 12:08:53 -0300 Subject: [PATCH 048/660] perf jvmti: Give hints about package names needed to build Give as examples of package names to install to have this built for fedora and debian, to help the user a bit. The part from 'e.g.:' onwards: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: William Cohen Link: https://lkml.kernel.org/n/tip-edbi4r2pvzn7no6ebxbtczng@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c7abd83a8e19..6b307e97dc57 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -847,7 +847,7 @@ ifndef NO_JVMTI ifeq ($(feature-jvmti), 1) $(call detected_var,JDIR) else - $(warning No openjdk development package found, please install JDK package) + $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel) NO_JVMTI := 1 endif endif From c13009c1ef3a94cfea212c86bbb94c8361e5de0c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 17:57:32 -0300 Subject: [PATCH 049/660] perf tests bpf: Remove unused ptrace.h include from LLVM test The bpf-script-test-kbuild.c script, used in one of the LLVM subtests, includes ptrace.h unnecessarily, and that ends up making it include a header that uses asm(_ASM_SP), a feature that is not supported by clang <= 4.0, breaking that 'perf test' entry. This ended up leading to the ca26cffa4e4a ("x86/asm: Allow again using asm.h when building for the 'bpf' clang target"), adding an ifndef __BPF__ to the arch/x86/include/asm/asm.h file. Newer clang versions accept that asm(_ASM_SP) construct, so just remove the ptrace.h include, which paves the way for reverting ca26cffa4e4a ("x86/asm: Allow again using asm.h when building for the 'bpf' clang target"). Suggested-by: Yonghong Song Acked-by: Yonghong Song Link: https://lkml.kernel.org/r/613f0a0d-c433-8f4d-dcc1-c9889deae39e@fb.com Cc: Adrian Hunter Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Ahern Cc: Dmitriy Vyukov Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-clbcnzbakdp18ibme4wt43ib@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-test-kbuild.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c index 3626924740d8..ff3ec8337f0a 100644 --- a/tools/perf/tests/bpf-script-test-kbuild.c +++ b/tools/perf/tests/bpf-script-test-kbuild.c @@ -9,7 +9,6 @@ #define SEC(NAME) __attribute__((section(NAME), used)) #include -#include SEC("func=vfs_llseek") int bpf_func__vfs_llseek(void *ctx) From fd97d39b0aa49a4beb429aec344604c1b689f089 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 18:03:33 -0300 Subject: [PATCH 050/660] Revert "x86/asm: Allow again using asm.h when building for the 'bpf' clang target" This reverts commit ca26cffa4e4aaeb09bb9e308f95c7835cb149248. Newer clang versions accept that asm(_ASM_SP) construct, and now that the bpf-script-test-kbuild.c script, used in one of the 'perf test LLVM' subtests doesn't include ptrace.h, which ended up including arch/x86/include/asm/asm.h, we can revert this patch. Suggested-by: Yonghong Song Link: https://lkml.kernel.org/r/613f0a0d-c433-8f4d-dcc1-c9889deae39e@fb.com Acked-by: Yonghong Song Cc: Adrian Hunter Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Ahern Cc: Dmitriy Vyukov Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-nqozcv8loq40tkqpfw997993@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/asm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 386a6900e206..219faaec51df 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,7 +136,6 @@ #endif #ifndef __ASSEMBLY__ -#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -146,6 +145,5 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif -#endif #endif /* _ASM_X86_ASM_H */ From 90ce61b91903f9c357cbceced45d41642f2aa812 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 9 Apr 2018 18:26:47 +0800 Subject: [PATCH 051/660] perf script: Use HAVE_LIBXXX_SUPPORT to replace NO_LIBXXX In Makefile.config, we define the conditional compilation variables HAVE_LIBPERL_SUPPORT and HAVE_LIBPYTHON_SUPPORT. To make the C code more consistent, this patch replaces NO_LIBPERL/NO_LIBPYTHON in C code with HAVE_LIBPERL_SUPPORT/ HAVE_LIBPYTHON_SUPPORT. Signed-off-by: Jin Yao Suggested-by: Ingo Molnar Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1523269609-28824-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++-- tools/perf/util/trace-event-scripting.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 313c42423393..b17edbcd98cc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2801,11 +2801,11 @@ int find_scripts(char **scripts_array, char **scripts_path_array) for_each_lang(scripts_path, scripts_dir, lang_dirent) { scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, lang_dirent->d_name); -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT if (strstr(lang_path, "perl")) continue; #endif -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT if (strstr(lang_path, "python")) continue; #endif diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 0ac9077f62a2..b1e5c3a2b8e3 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -98,7 +98,7 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT void setup_python_scripting(void) { register_python_scripting(&python_scripting_unsupported_ops); @@ -161,7 +161,7 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT void setup_perl_scripting(void) { register_perl_scripting(&perl_scripting_unsupported_ops); From 22e9af4e94801bbdf6945e55db64b877be7c71b3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 9 Apr 2018 18:26:48 +0800 Subject: [PATCH 052/660] perf tools: Rename HAVE_SYSCALL_TABLE to HAVE_SYSCALL_TABLE_SUPPORT To be consistent with other HAVE_XXX_SUPPORT uses in Makefile.config, this patch renames HAVE_SYSCALL_TABLE to HAVE_SYSCALL_TABLE_SUPPORT and updates the C code accordingly. Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1523269609-28824-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/builtin-help.c | 2 +- tools/perf/perf.c | 4 ++-- tools/perf/util/generate-cmdlist.sh | 2 +- tools/perf/util/syscalltbl.c | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6b307e97dc57..ae7dc46e8f8a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -68,7 +68,7 @@ ifeq ($(NO_PERF_REGS),0) endif ifneq ($(NO_SYSCALL_TABLE),1) - CFLAGS += -DHAVE_SYSCALL_TABLE + CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT endif # So far there's only x86 and arm libdw unwind support merged in perf. diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 4aca13f23b9d..1c41b4eaf73c 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv) #ifdef HAVE_LIBELF_SUPPORT "probe", #endif -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) "trace", #endif NULL }; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 1659029d03fc..20a08cb32332 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -73,7 +73,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -491,7 +491,7 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv); diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index ff17920a5ebc..c3cef36d4176 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 895122d638dd..0ee7f568d60c 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -17,7 +17,7 @@ #include #include -#ifdef HAVE_SYSCALL_TABLE +#ifdef HAVE_SYSCALL_TABLE_SUPPORT #include #include "string2.h" #include "util.h" @@ -139,7 +139,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#else /* HAVE_SYSCALL_TABLE */ +#else /* HAVE_SYSCALL_TABLE_SUPPORT */ #include @@ -176,4 +176,4 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g { return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#endif /* HAVE_SYSCALL_TABLE */ +#endif /* HAVE_SYSCALL_TABLE_SUPPORT */ From 8a812bf552d98f6f887f860d3910f201b4a97b26 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 9 Apr 2018 18:26:49 +0800 Subject: [PATCH 053/660] perf version: Print status for syscall_table This patch doesn't print "libaudit" line if HAVE_SYSCALL_TABLE_SUPPORT is available and add a line for HAVE_SYSCALL_TABLE_SUPPORT. For example, $ ./perf -vv perf version 4.13.rc5.gc2f8af9 dwarf: [ on ] # HAVE_DWARF_SUPPORT dwarf_getlocations: [ on ] # HAVE_DWARF_GETLOCATIONS_SUPPORT glibc: [ on ] # HAVE_GLIBC_SUPPORT gtk2: [ on ] # HAVE_GTK2_SUPPORT syscall_table: [ on ] # HAVE_SYSCALL_TABLE_SUPPORT libbfd: [ on ] # HAVE_LIBBFD_SUPPORT libelf: [ on ] # HAVE_LIBELF_SUPPORT libnuma: [ on ] # HAVE_LIBNUMA_SUPPORT numa_num_possible_cpus: [ on ] # HAVE_LIBNUMA_SUPPORT libperl: [ on ] # HAVE_LIBPERL_SUPPORT libpython: [ on ] # HAVE_LIBPYTHON_SUPPORT libslang: [ on ] # HAVE_SLANG_SUPPORT libcrypto: [ on ] # HAVE_LIBCRYPTO_SUPPORT libunwind: [ on ] # HAVE_LIBUNWIND_SUPPORT libdw-dwarf-unwind: [ on ] # HAVE_DWARF_SUPPORT zlib: [ on ] # HAVE_ZLIB_SUPPORT lzma: [ on ] # HAVE_LZMA_SUPPORT get_cpuid: [ on ] # HAVE_AUXTRACE_SUPPORT bpf: [ on ] # HAVE_LIBBPF_SUPPORT The line "syscall_table: [ on ] # HAVE_SYSCALL_TABLE_SUPPORT" is new created. Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1523269609-28824-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-version.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 2abe3910d6b6..50df168be326 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -60,7 +60,10 @@ static void library_status(void) STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations); STATUS(HAVE_GLIBC_SUPPORT, glibc); STATUS(HAVE_GTK2_SUPPORT, gtk2); +#ifndef HAVE_SYSCALL_TABLE_SUPPORT STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit); +#endif + STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table); STATUS(HAVE_LIBBFD_SUPPORT, libbfd); STATUS(HAVE_LIBELF_SUPPORT, libelf); STATUS(HAVE_LIBNUMA_SUPPORT, libnuma); From e8103e44cebe2ef891bc3a5c6c4b74854846968b Mon Sep 17 00:00:00 2001 From: Takuya Yamamoto Date: Tue, 10 Apr 2018 23:35:39 +0900 Subject: [PATCH 054/660] perf sched: Fix documentation for timehist Fixed a incorrect option and usage to those shown by "perf sched timehist -h", i.e. the default is really --call-graph, which is equivalent to -g. Signed-off-by: Takuya Yamamoto Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-8fzo0dlsi1mku5aqx8brep5s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index bb33601a823b..63f938b887dd 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -104,8 +104,8 @@ OPTIONS for 'perf sched timehist' kallsyms pathname -g:: ---no-call-graph:: - Do not display call chains if present. +--call-graph:: + Display call chains if present (default on). --max-stack:: Maximum number of functions to display in backtrace, default 5. From c656941df9bc80f7ec65b92ca73c42f8b0b62628 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 8 Apr 2018 16:57:35 -0700 Subject: [PATCH 055/660] ASoC: fsl_esai: Fix divisor calculation failure at lower ratio When the desired ratio is less than 256, the savesub (tolerance) in the calculation would become 0. This will then fail the loop- search immediately without reporting any errors. But if the ratio is smaller enough, there is no need to calculate the tolerance because PM divisor alone is enough to get the ratio. So a simple fix could be just to set PM directly instead of going into the loop-search. Reported-by: Marek Vasut Signed-off-by: Nicolin Chen Tested-by: Marek Vasut Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/fsl/fsl_esai.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index 40a700493f4c..da8fd98c7f51 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -144,6 +144,13 @@ static int fsl_esai_divisor_cal(struct snd_soc_dai *dai, bool tx, u32 ratio, psr = ratio <= 256 * maxfp ? ESAI_xCCR_xPSR_BYPASS : ESAI_xCCR_xPSR_DIV8; + /* Do not loop-search if PM (1 ~ 256) alone can serve the ratio */ + if (ratio <= 256) { + pm = ratio; + fp = 1; + goto out; + } + /* Set the max fluctuation -- 0.1% of the max devisor */ savesub = (psr ? 1 : 8) * 256 * maxfp / 1000; From fac8a5a5ea40b03dcbb0f46977094099ba2220b8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sat, 7 Apr 2018 21:40:21 -0700 Subject: [PATCH 056/660] ASoC: fsl_ssi: Fix mode setting when changing channel number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a partial revert (in a cleaner way) of commit ebf08ae3bc90 ("ASoC: fsl_ssi: Keep ssi->i2s_net updated") to fix a regression at test cases when switching between mono and stereo audio. The problem is that ssi->i2s_net is initialized in set_dai_fmt() only, while this set_dai_fmt() is only called during the dai-link probe(). The original patch assumed set_dai_fmt() would be called during every playback instance, so it failed at the overriding use cases. This patch adds the local variable i2s_net back to let regular use cases still follow the mode settings from the set_dai_fmt(). Meanwhile, the original commit of keeping ssi->i2s_net updated was to make set_tdm_slot() clean by checking the ssi->i2s_net directly instead of reading SCR register. However, the change itself is not necessary (or even harmful) because the set_tdm_slot() might fail to check the slot number for Normal-Mode-None-Net settings while mono audio cases still need 2 slots. So this patch can also fix it. And it adds an extra line of comments to declare ssi->i2s_net does not reflect the register value but merely the initial setting from the set_dai_fmt(). Reported-by: Mika Penttilä Signed-off-by: Nicolin Chen Tested-by: Mika Penttilä Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 0823b08923b5..89df2d9f63d7 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -217,6 +217,7 @@ struct fsl_ssi_soc_data { * @dai_fmt: DAI configuration this device is currently used with * @streams: Mask of current active streams: BIT(TX) and BIT(RX) * @i2s_net: I2S and Network mode configurations of SCR register + * (this is the initial settings based on the DAI format) * @synchronous: Use synchronous mode - both of TX and RX use STCK and SFCK * @use_dma: DMA is used or FIQ with stream filter * @use_dual_fifo: DMA with support for dual FIFO mode @@ -829,16 +830,23 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, } if (!fsl_ssi_is_ac97(ssi)) { + /* + * Keep the ssi->i2s_net intact while having a local variable + * to override settings for special use cases. Otherwise, the + * ssi->i2s_net will lose the settings for regular use cases. + */ + u8 i2s_net = ssi->i2s_net; + /* Normal + Network mode to send 16-bit data in 32-bit frames */ if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16) - ssi->i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET; + i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET; /* Use Normal mode to send mono data at 1st slot of 2 slots */ if (channels == 1) - ssi->i2s_net = SSI_SCR_I2S_MODE_NORMAL; + i2s_net = SSI_SCR_I2S_MODE_NORMAL; regmap_update_bits(regs, REG_SSI_SCR, - SSI_SCR_I2S_NET_MASK, ssi->i2s_net); + SSI_SCR_I2S_NET_MASK, i2s_net); } /* In synchronous mode, the SSI uses STCCR for capture */ From 6f5427039c33e149b711c0f973fcac7f6875b768 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Mar 2018 16:53:10 +0200 Subject: [PATCH 057/660] ASoC: rsnd: mark PM functions __maybe_unused The suspend/resume callbacks are now optional, leading to a warning when they are unused: sound/soc/sh/rcar/core.c:1548:12: error: 'rsnd_resume' defined but not used [-Werror=unused-function] static int rsnd_resume(struct device *dev) ^~~~~~~~~~~ sound/soc/sh/rcar/core.c:1539:12: error: 'rsnd_suspend' defined but not used [-Werror=unused-function] static int rsnd_suspend(struct device *dev) This marks the as __maybe_unused to avoid the warning. Fixes: f8a9a29c4fe9 ("ASoC: rsnd: set pm_ops in hibernate-compatible way") Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 6a76688a8ba9..94f081b93258 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1536,7 +1536,7 @@ static int rsnd_remove(struct platform_device *pdev) return ret; } -static int rsnd_suspend(struct device *dev) +static int __maybe_unused rsnd_suspend(struct device *dev) { struct rsnd_priv *priv = dev_get_drvdata(dev); @@ -1545,7 +1545,7 @@ static int rsnd_suspend(struct device *dev) return 0; } -static int rsnd_resume(struct device *dev) +static int __maybe_unused rsnd_resume(struct device *dev) { struct rsnd_priv *priv = dev_get_drvdata(dev); From 90619eb1dc4f19357fef5e9c13c6c9beead0fd80 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 2 Apr 2018 12:06:14 -0500 Subject: [PATCH 058/660] ASoC: Intel: atom: fix ACPI/PCI Kconfig The split between ACPI and PCI platforms generated issues with randconfig: with SND_SST_ATOM_HIFI2_PLATFORM_PCI=y and SND_SST_ATOM_HIFI2_PLATFORM=m, we get this module link failure: ERROR: "sst_context_init" [sound/soc/intel/atom/sst/snd-intel-sst-acpi.ko] undefined! ERROR: "sst_context_cleanup" [sound/soc/intel/atom/sst/snd-intel-sst-acpi.ko] undefined! ERROR: "sst_alloc_drv_context" [sound/soc/intel/atom/sst/snd-intel-sst-acpi.ko] undefined! ERROR: "intel_sst_pm" [sound/soc/intel/atom/sst/snd-intel-sst-acpi.ko] undefined! ERROR: "sst_configure_runtime_pm" [sound/soc/intel/atom/sst/snd-intel-sst-acpi.ko] undefined! To keep things simple, let's expose two configs for SND_SST_ATOM_HIFI2_PLATFORM_PCI and SND_SST_ATOM_HIFI2_PLATFORM_ACPI, which select a common SND_SST_ATOM_HIFI2_PLATFORM option. To avoid breaking existing solutions with the semantics change, SND_SST_ATOM_HIFI2_PLATFORM_ACPI uses "default ACPI" so that "make oldnoconfig" and "make olddefconfig" still work as expected. Also remove mentions of Medfield while we are at it since it was removed recently. Reported-by: Arnd Bergmann Fixes: 4772c16ede52 ("ASoC: Intel: Kconfig: Simplify-clarify ACPI/PCI dependencies") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/Kconfig | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index ceb105cbd461..addac2a8e52a 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -72,24 +72,28 @@ config SND_SOC_INTEL_BAYTRAIL for Baytrail Chromebooks but this option is now deprecated and is not recommended, use SND_SST_ATOM_HIFI2_PLATFORM instead. +config SND_SST_ATOM_HIFI2_PLATFORM + tristate + select SND_SOC_COMPRESS + config SND_SST_ATOM_HIFI2_PLATFORM_PCI - tristate "PCI HiFi2 (Medfield, Merrifield) Platforms" + tristate "PCI HiFi2 (Merrifield) Platforms" depends on X86 && PCI select SND_SST_IPC_PCI - select SND_SOC_COMPRESS + select SND_SST_ATOM_HIFI2_PLATFORM help - If you have a Intel Medfield or Merrifield/Edison platform, then + If you have a Intel Merrifield/Edison platform, then enable this option by saying Y or m. Distros will typically not - enable this option: Medfield devices are not available to - developers and while Merrifield/Edison can run a mainline kernel with - limited functionality it will require a firmware file which - is not in the standard firmware tree + enable this option: while Merrifield/Edison can run a mainline + kernel with limited functionality it will require a firmware file + which is not in the standard firmware tree -config SND_SST_ATOM_HIFI2_PLATFORM +config SND_SST_ATOM_HIFI2_PLATFORM_ACPI tristate "ACPI HiFi2 (Baytrail, Cherrytrail) Platforms" + default ACPI depends on X86 && ACPI select SND_SST_IPC_ACPI - select SND_SOC_COMPRESS + select SND_SST_ATOM_HIFI2_PLATFORM select SND_SOC_ACPI_INTEL_MATCH select IOSF_MBI help From 65030ff305bc9c51cb75705483bdaac7813778f0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Apr 2018 14:25:18 +0300 Subject: [PATCH 059/660] ASoC: topology: fix some tiny memory leaks These tiny memory leaks don't have a huge real life impact but they cause static checker warnings so let's fix them. Signed-off-by: Dan Carpenter Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index fa27d0fca6dc..942c6e5eb4b7 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1325,8 +1325,10 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_denum_create( ec->hdr.name); kc[i].name = kstrdup(ec->hdr.name, GFP_KERNEL); - if (kc[i].name == NULL) + if (kc[i].name == NULL) { + kfree(se); goto err_se; + } kc[i].private_value = (long)se; kc[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; kc[i].access = ec->hdr.access; @@ -1442,8 +1444,10 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_dbytes_create( be->hdr.name, be->hdr.access); kc[i].name = kstrdup(be->hdr.name, GFP_KERNEL); - if (kc[i].name == NULL) + if (kc[i].name == NULL) { + kfree(sbe); goto err; + } kc[i].private_value = (long)sbe; kc[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; kc[i].access = be->hdr.access; From 349fa7d6e1935f49bf4161c4900711b2989180a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 12 Apr 2018 11:48:09 -0400 Subject: [PATCH 060/660] ext4: prevent right-shifting extents beyond EXT_MAX_BLOCKS During the "insert range" fallocate operation, extents starting at the range offset are shifted "right" (to a higher file offset) by the range length. But, as shown by syzbot, it's not validated that this doesn't cause extents to be shifted beyond EXT_MAX_BLOCKS. In that case ->ee_block can wrap around, corrupting the extent tree. Fix it by returning an error if the space between the end of the last extent and EXT4_MAX_BLOCKS is smaller than the range being inserted. This bug can be reproduced by running the following commands when the current directory is on an ext4 filesystem with a 4k block size: fallocate -l 8192 file fallocate --keep-size -o 0xfffffffe000 -l 4096 -n file fallocate --insert-range -l 8192 file Then after unmounting the filesystem, e2fsck reports corruption. Reported-by: syzbot+06c885be0edcdaeab40c@syzkaller.appspotmail.com Fixes: 331573febb6a ("ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0a7315961bac..c969275ce3ee 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5329,8 +5329,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, stop = le32_to_cpu(extent->ee_block); /* - * In case of left shift, Don't start shifting extents until we make - * sure the hole is big enough to accommodate the shift. + * For left shifts, make sure the hole on the left is big enough to + * accommodate the shift. For right shifts, make sure the last extent + * won't be shifted beyond EXT_MAX_BLOCKS. */ if (SHIFT == SHIFT_LEFT) { path = ext4_find_extent(inode, start - 1, &path, @@ -5350,9 +5351,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, if ((start == ex_start && shift > ex_start) || (shift > start - ex_end)) { - ext4_ext_drop_refs(path); - kfree(path); - return -EINVAL; + ret = -EINVAL; + goto out; + } + } else { + if (shift > EXT_MAX_BLOCKS - + (stop + ext4_ext_get_actual_len(extent))) { + ret = -EINVAL; + goto out; } } From f0f4cf5b306620282db0c59ff963012e1973e025 Mon Sep 17 00:00:00 2001 From: Krish Sadhukhan Date: Wed, 11 Apr 2018 01:10:16 -0400 Subject: [PATCH 061/660] x86: Add check for APIC access address for vmentry of L2 guests According to the sub-section titled 'VM-Execution Control Fields' in the section titled 'Basic VM-Entry Checks' in Intel SDM vol. 3C, the following vmentry check must be enforced: If the 'virtualize APIC-accesses' VM-execution control is 1, the APIC-access address must satisfy the following checks: - Bits 11:0 of the address must be 0. - The address should not set any bits beyond the processor's physical-address width. This patch adds the necessary check to conform to this rule. If the check fails, we cause the L2 VMENTRY to fail which is what the associated unit test (following patch) expects. Reviewed-by: Mihai Carabas Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Jim Mattson Reviewed-by: Wanpeng Li Signed-off-by: Krish Sadhukhan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fe3e13edf201..a13c603bdefb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10602,6 +10602,16 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, return true; } +static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && + !page_address_valid(vcpu, vmcs12->apic_access_addr)) + return -EINVAL; + else + return 0; +} + static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -11293,6 +11303,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_apic_access_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; From 4e1acd7b31a03f24cc6108d37d005e6b1d48c5d3 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Fri, 13 Apr 2018 08:36:30 +0800 Subject: [PATCH 062/660] kvm: selftests: add -std=gnu99 cflags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/kvm_util.c: In function ‘kvm_memcmp_hva_gva’: lib/kvm_util.c:332:2: error: ‘for’ loop initial declarations are only allowed in C99 mode So add -std=gnu99 to CFLAGS Signed-off-by: Peng Hao Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index dc44de904797..1da541e1ab75 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -14,7 +14,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$( Date: Thu, 12 Apr 2018 21:51:40 +0530 Subject: [PATCH 063/660] perf tests: Disable breakpoint accounting test for powerpc We disable this test as instruction breakpoints (HW_BREAKPOINT_X) are not available for powerpc. Before applying patch: 21: Breakpoint accounting : --- start --- test child forked, pid 3635 failed opening event 0 failed opening event 0 watchpoints count 1, breakpoints count 0, has_ioctl 1, share 0 test child finished with -2 ---- end ---- Breakpoint accounting: Skip After applying patch: 21: Breakpoint accounting : Disabled Signed-off-by: Sandipan Das Cc: Jiri Olsa Cc: Naveen N. Rao Cc: Ravi Bangoria Link: http://lkml.kernel.org/r/20180412162140.2992-1-sandipan@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 625f5a6772af..cac8f8889bc3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -118,6 +118,7 @@ static struct test generic_tests[] = { { .desc = "Breakpoint accounting", .func = test__bp_accounting, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Number of exit events of a simple workload", From f6b7aeee8f167409195fbf1364d02988fecad1d0 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 3 Apr 2018 08:55:03 -0400 Subject: [PATCH 064/660] MIPS: io: Prevent compiler reordering writeX() writeX() has strong ordering semantics with respect to memory updates. In the absence of a write barrier or a compiler barrier, the compiler can reorder register and memory update instructions. This breaks the writeX() API. Signed-off-by: Sinan Kaya Cc: Arnd Bergmann Cc: Ralf Baechle Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/18997/ [jhogan@kernel.org: Tidy commit message] Signed-off-by: James Hogan --- arch/mips/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 0cbf3af37eca..fd00ddafb425 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -307,7 +307,7 @@ static inline void iounmap(const volatile void __iomem *addr) #if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_LOONGSON3_ENHANCEMENT) #define war_io_reorder_wmb() wmb() #else -#define war_io_reorder_wmb() do { } while (0) +#define war_io_reorder_wmb() barrier() #endif #define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq) \ From 08ea556e14b56e9a49b19abd8e39f0c9e05582f2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 15:26:43 -0700 Subject: [PATCH 065/660] ibmvnic: Define vnic_login_client_data name field as unsized array The "name" field of struct vnic_login_client_data is a char array of undefined length. This should be written as "char name[]" so the compiler can make better decisions about the field (for example, not assuming it's a single character). This was noticed while trying to tighten the CONFIG_FORTIFY_SOURCE checking. Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index aad5658d79d5..35fbb41cd2d4 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3170,7 +3170,7 @@ static int send_version_xchg(struct ibmvnic_adapter *adapter) struct vnic_login_client_data { u8 type; __be16 len; - char name; + char name[]; } __packed; static int vnic_client_data_len(struct ibmvnic_adapter *adapter) @@ -3199,21 +3199,21 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter, vlcd->type = 1; len = strlen(os_name) + 1; vlcd->len = cpu_to_be16(len); - strncpy(&vlcd->name, os_name, len); - vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + strncpy(vlcd->name, os_name, len); + vlcd = (struct vnic_login_client_data *)(vlcd->name + len); /* Type 2 - LPAR name */ vlcd->type = 2; len = strlen(utsname()->nodename) + 1; vlcd->len = cpu_to_be16(len); - strncpy(&vlcd->name, utsname()->nodename, len); - vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + strncpy(vlcd->name, utsname()->nodename, len); + vlcd = (struct vnic_login_client_data *)(vlcd->name + len); /* Type 3 - device name */ vlcd->type = 3; len = strlen(adapter->netdev->name) + 1; vlcd->len = cpu_to_be16(len); - strncpy(&vlcd->name, adapter->netdev->name, len); + strncpy(vlcd->name, adapter->netdev->name, len); } static int send_login(struct ibmvnic_adapter *adapter) From b16520f7493d06d8ef6d4255bdfcf7a803d7874a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 17:52:34 -0700 Subject: [PATCH 066/660] net/tls: Remove VLA usage In the quest to remove VLAs from the kernel[1], this replaces the VLA size with the only possible size used in the code, and adds a mechanism to double-check future IV sizes. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Signed-off-by: Kees Cook Acked-by: Dave Watson Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 4dc766b03f00..71e79597f940 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -41,6 +41,8 @@ #include #include +#define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE + static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, @@ -673,7 +675,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); - char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size]; + char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE]; struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2]; struct scatterlist *sgin = &sgin_arr[0]; struct strp_msg *rxm = strp_msg(skb); @@ -1094,6 +1096,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_priv; } + /* Sanity-check the IV size for stack allocations. */ + if (iv_size > MAX_IV_SIZE) { + rc = -EINVAL; + goto free_priv; + } + cctx->prepend_size = TLS_HEADER_SIZE + nonce_size; cctx->tag_size = tag_size; cctx->overhead_size = cctx->prepend_size + cctx->tag_size; From 9a4381618262157586051f5ba0db42df3c6ab4b5 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Apr 2018 18:04:29 -0700 Subject: [PATCH 067/660] mISDN: Remove VLAs There's an ongoing effort to remove VLAs[1] from the kernel to eventually turn on -Wvla. Remove the VLAs from the mISDN code by switching to using kstrdup in one place and using an upper bound in another. Signed-off-by: Laura Abbott Signed-off-by: David S. Miller --- drivers/isdn/mISDN/dsp_hwec.c | 8 +++++--- drivers/isdn/mISDN/l1oip_core.c | 14 +++++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/isdn/mISDN/dsp_hwec.c b/drivers/isdn/mISDN/dsp_hwec.c index a6e87076acc2..5336bbdbfdc5 100644 --- a/drivers/isdn/mISDN/dsp_hwec.c +++ b/drivers/isdn/mISDN/dsp_hwec.c @@ -68,12 +68,12 @@ void dsp_hwec_enable(struct dsp *dsp, const char *arg) goto _do; { - char _dup[len + 1]; char *dup, *tok, *name, *val; int tmp; - strcpy(_dup, arg); - dup = _dup; + dup = kstrdup(arg, GFP_ATOMIC); + if (!dup) + return; while ((tok = strsep(&dup, ","))) { if (!strlen(tok)) @@ -89,6 +89,8 @@ void dsp_hwec_enable(struct dsp *dsp, const char *arg) deftaps = tmp; } } + + kfree(dup); } _do: diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index 21d50e4cc5e1..b05022f94f18 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -279,7 +279,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, u16 timebase, u8 *buf, int len) { u8 *p; - u8 frame[len + 32]; + u8 frame[MAX_DFRAME_LEN_L1 + 32]; struct socket *socket = NULL; if (debug & DEBUG_L1OIP_MSG) @@ -902,7 +902,11 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) p = skb->data; l = skb->len; while (l) { - ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME; + /* + * This is technically bounded by L1OIP_MAX_PERFRAME but + * MAX_DFRAME_LEN_L1 < L1OIP_MAX_PERFRAME + */ + ll = (l < MAX_DFRAME_LEN_L1) ? l : MAX_DFRAME_LEN_L1; l1oip_socket_send(hc, 0, dch->slot, 0, hc->chan[dch->slot].tx_counter++, p, ll); p += ll; @@ -1140,7 +1144,11 @@ handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb) p = skb->data; l = skb->len; while (l) { - ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME; + /* + * This is technically bounded by L1OIP_MAX_PERFRAME but + * MAX_DFRAME_LEN_L1 < L1OIP_MAX_PERFRAME + */ + ll = (l < MAX_DFRAME_LEN_L1) ? l : MAX_DFRAME_LEN_L1; l1oip_socket_send(hc, hc->codec, bch->slot, 0, hc->chan[bch->slot].tx_counter, p, ll); hc->chan[bch->slot].tx_counter += ll; From 1c2734b31d72316e3faaad88c0c9c46fa92a4b20 Mon Sep 17 00:00:00 2001 From: Raghuram Chary J Date: Wed, 11 Apr 2018 20:36:36 +0530 Subject: [PATCH 068/660] lan78xx: PHY DSP registers initialization to address EEE link drop issues with long cables The patch is to configure DSP registers of PHY device to handle Gbe-EEE failures with >40m cable length. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Raghuram Chary J Signed-off-by: David S. Miller --- drivers/net/phy/microchip.c | 178 ++++++++++++++++++++++++++++++++++- include/linux/microchipphy.h | 8 ++ 2 files changed, 185 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 0f293ef28935..a97ac8c12c4c 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -20,6 +20,7 @@ #include #include #include +#include #define DRIVER_AUTHOR "WOOJUNG HUH " #define DRIVER_DESC "Microchip LAN88XX PHY driver" @@ -30,6 +31,16 @@ struct lan88xx_priv { __u32 wolopts; }; +static int lan88xx_read_page(struct phy_device *phydev) +{ + return __phy_read(phydev, LAN88XX_EXT_PAGE_ACCESS); +} + +static int lan88xx_write_page(struct phy_device *phydev, int page) +{ + return __phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, page); +} + static int lan88xx_phy_config_intr(struct phy_device *phydev) { int rc; @@ -66,6 +77,150 @@ static int lan88xx_suspend(struct phy_device *phydev) return 0; } +static int lan88xx_TR_reg_set(struct phy_device *phydev, u16 regaddr, + u32 data) +{ + int val, save_page, ret = 0; + u16 buf; + + /* Save current page */ + save_page = phy_save_page(phydev); + if (save_page < 0) { + pr_warn("Failed to get current page\n"); + goto err; + } + + /* Switch to TR page */ + lan88xx_write_page(phydev, LAN88XX_EXT_PAGE_ACCESS_TR); + + ret = __phy_write(phydev, LAN88XX_EXT_PAGE_TR_LOW_DATA, + (data & 0xFFFF)); + if (ret < 0) { + pr_warn("Failed to write TR low data\n"); + goto err; + } + + ret = __phy_write(phydev, LAN88XX_EXT_PAGE_TR_HIGH_DATA, + (data & 0x00FF0000) >> 16); + if (ret < 0) { + pr_warn("Failed to write TR high data\n"); + goto err; + } + + /* Config control bits [15:13] of register */ + buf = (regaddr & ~(0x3 << 13));/* Clr [14:13] to write data in reg */ + buf |= 0x8000; /* Set [15] to Packet transmit */ + + ret = __phy_write(phydev, LAN88XX_EXT_PAGE_TR_CR, buf); + if (ret < 0) { + pr_warn("Failed to write data in reg\n"); + goto err; + } + + usleep_range(1000, 2000);/* Wait for Data to be written */ + val = __phy_read(phydev, LAN88XX_EXT_PAGE_TR_CR); + if (!(val & 0x8000)) + pr_warn("TR Register[0x%X] configuration failed\n", regaddr); +err: + return phy_restore_page(phydev, save_page, ret); +} + +static void lan88xx_config_TR_regs(struct phy_device *phydev) +{ + int err; + + /* Get access to Channel 0x1, Node 0xF , Register 0x01. + * Write 24-bit value 0x12B00A to register. Setting MrvlTrFix1000Kf, + * MrvlTrFix1000Kp, MasterEnableTR bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x0F82, 0x12B00A); + if (err < 0) + pr_warn("Failed to Set Register[0x0F82]\n"); + + /* Get access to Channel b'10, Node b'1101, Register 0x06. + * Write 24-bit value 0xD2C46F to register. Setting SSTrKf1000Slv, + * SSTrKp1000Mas bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x168C, 0xD2C46F); + if (err < 0) + pr_warn("Failed to Set Register[0x168C]\n"); + + /* Get access to Channel b'10, Node b'1111, Register 0x11. + * Write 24-bit value 0x620 to register. Setting rem_upd_done_thresh + * bits + */ + err = lan88xx_TR_reg_set(phydev, 0x17A2, 0x620); + if (err < 0) + pr_warn("Failed to Set Register[0x17A2]\n"); + + /* Get access to Channel b'10, Node b'1101, Register 0x10. + * Write 24-bit value 0xEEFFDD to register. Setting + * eee_TrKp1Long_1000, eee_TrKp2Long_1000, eee_TrKp3Long_1000, + * eee_TrKp1Short_1000,eee_TrKp2Short_1000, eee_TrKp3Short_1000 bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x16A0, 0xEEFFDD); + if (err < 0) + pr_warn("Failed to Set Register[0x16A0]\n"); + + /* Get access to Channel b'10, Node b'1101, Register 0x13. + * Write 24-bit value 0x071448 to register. Setting + * slv_lpi_tr_tmr_val1, slv_lpi_tr_tmr_val2 bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x16A6, 0x071448); + if (err < 0) + pr_warn("Failed to Set Register[0x16A6]\n"); + + /* Get access to Channel b'10, Node b'1101, Register 0x12. + * Write 24-bit value 0x13132F to register. Setting + * slv_sigdet_timer_val1, slv_sigdet_timer_val2 bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x16A4, 0x13132F); + if (err < 0) + pr_warn("Failed to Set Register[0x16A4]\n"); + + /* Get access to Channel b'10, Node b'1101, Register 0x14. + * Write 24-bit value 0x0 to register. Setting eee_3level_delay, + * eee_TrKf_freeze_delay bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x16A8, 0x0); + if (err < 0) + pr_warn("Failed to Set Register[0x16A8]\n"); + + /* Get access to Channel b'01, Node b'1111, Register 0x34. + * Write 24-bit value 0x91B06C to register. Setting + * FastMseSearchThreshLong1000, FastMseSearchThreshShort1000, + * FastMseSearchUpdGain1000 bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x0FE8, 0x91B06C); + if (err < 0) + pr_warn("Failed to Set Register[0x0FE8]\n"); + + /* Get access to Channel b'01, Node b'1111, Register 0x3E. + * Write 24-bit value 0xC0A028 to register. Setting + * FastMseKp2ThreshLong1000, FastMseKp2ThreshShort1000, + * FastMseKp2UpdGain1000, FastMseKp2ExitEn1000 bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x0FFC, 0xC0A028); + if (err < 0) + pr_warn("Failed to Set Register[0x0FFC]\n"); + + /* Get access to Channel b'01, Node b'1111, Register 0x35. + * Write 24-bit value 0x041600 to register. Setting + * FastMseSearchPhShNum1000, FastMseSearchClksPerPh1000, + * FastMsePhChangeDelay1000 bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x0FEA, 0x041600); + if (err < 0) + pr_warn("Failed to Set Register[0x0FEA]\n"); + + /* Get access to Channel b'10, Node b'1101, Register 0x03. + * Write 24-bit value 0x000004 to register. Setting TrFreeze bits. + */ + err = lan88xx_TR_reg_set(phydev, 0x1686, 0x000004); + if (err < 0) + pr_warn("Failed to Set Register[0x1686]\n"); +} + static int lan88xx_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -132,6 +287,25 @@ static void lan88xx_set_mdix(struct phy_device *phydev) phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, LAN88XX_EXT_PAGE_SPACE_0); } +static int lan88xx_config_init(struct phy_device *phydev) +{ + int val; + + genphy_config_init(phydev); + /*Zerodetect delay enable */ + val = phy_read_mmd(phydev, MDIO_MMD_PCS, + PHY_ARDENNES_MMD_DEV_3_PHY_CFG); + val |= PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_; + + phy_write_mmd(phydev, MDIO_MMD_PCS, PHY_ARDENNES_MMD_DEV_3_PHY_CFG, + val); + + /* Config DSP registers */ + lan88xx_config_TR_regs(phydev); + + return 0; +} + static int lan88xx_config_aneg(struct phy_device *phydev) { lan88xx_set_mdix(phydev); @@ -151,7 +325,7 @@ static struct phy_driver microchip_phy_driver[] = { .probe = lan88xx_probe, .remove = lan88xx_remove, - .config_init = genphy_config_init, + .config_init = lan88xx_config_init, .config_aneg = lan88xx_config_aneg, .ack_interrupt = lan88xx_phy_ack_interrupt, @@ -160,6 +334,8 @@ static struct phy_driver microchip_phy_driver[] = { .suspend = lan88xx_suspend, .resume = genphy_resume, .set_wol = lan88xx_set_wol, + .read_page = lan88xx_read_page, + .write_page = lan88xx_write_page, } }; module_phy_driver(microchip_phy_driver); diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h index eb492d47f717..8f9c90379732 100644 --- a/include/linux/microchipphy.h +++ b/include/linux/microchipphy.h @@ -70,4 +70,12 @@ #define LAN88XX_MMD3_CHIP_ID (32877) #define LAN88XX_MMD3_CHIP_REV (32878) +/* DSP registers */ +#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG (0x806A) +#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_ (0x2000) +#define LAN88XX_EXT_PAGE_ACCESS_TR (0x52B5) +#define LAN88XX_EXT_PAGE_TR_CR 16 +#define LAN88XX_EXT_PAGE_TR_LOW_DATA 17 +#define LAN88XX_EXT_PAGE_TR_HIGH_DATA 18 + #endif /* _MICROCHIPPHY_H */ From c3317f4db831b7564ff8d1670326456a7fbbbcb3 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 11 Apr 2018 22:52:09 +0200 Subject: [PATCH 069/660] tipc: fix unbalanced reference counter When a topology subscription is created, we may encounter (or KASAN may provoke) a failure to create a corresponding service instance in the binding table. Instead of letting the tipc_nametbl_subscribe() report the failure back to the caller, the function just makes a warning printout and returns, without incrementing the subscription reference counter as expected by the caller. This makes the caller believe that the subscription was successful, so it will at a later moment try to unsubscribe the item. This involves a sub_put() call. Since the reference counter never was incremented in the first place, we get a premature delete of the subscription item, followed by a "use-after-free" warning. We fix this by adding a return value to tipc_nametbl_subscribe() and make the caller aware of the failure to subscribe. This bug seems to always have been around, but this fix only applies back to the commit shown below. Given the low risk of this happening we believe this to be sufficient. Fixes: commit 218527fe27ad ("tipc: replace name table service range array with rb tree") Reported-by: syzbot+aa245f26d42b8305d157@syzkaller.appspotmail.com Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_table.c | 5 ++++- net/tipc/name_table.h | 2 +- net/tipc/subscr.c | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index b1fe20972aa9..4068eaad61a6 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -665,13 +665,14 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, /** * tipc_nametbl_subscribe - add a subscription object to the name table */ -void tipc_nametbl_subscribe(struct tipc_subscription *sub) +bool tipc_nametbl_subscribe(struct tipc_subscription *sub) { struct name_table *nt = tipc_name_table(sub->net); struct tipc_net *tn = tipc_net(sub->net); struct tipc_subscr *s = &sub->evt.s; u32 type = tipc_sub_read(s, seq.type); struct tipc_service *sc; + bool res = true; spin_lock_bh(&tn->nametbl_lock); sc = tipc_service_find(sub->net, type); @@ -685,8 +686,10 @@ void tipc_nametbl_subscribe(struct tipc_subscription *sub) pr_warn("Failed to subscribe for {%u,%u,%u}\n", type, tipc_sub_read(s, seq.lower), tipc_sub_read(s, seq.upper)); + res = false; } spin_unlock_bh(&tn->nametbl_lock); + return res; } /** diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 4b14fc28d9e2..0febba41da86 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -126,7 +126,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 lower, u32 upper, u32 node, u32 key); -void tipc_nametbl_subscribe(struct tipc_subscription *s); +bool tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index b7d80bc5f4ab..f340e53da625 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -153,7 +153,10 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net, memcpy(&sub->evt.s, s, sizeof(*s)); spin_lock_init(&sub->lock); kref_init(&sub->kref); - tipc_nametbl_subscribe(sub); + if (!tipc_nametbl_subscribe(sub)) { + kfree(sub); + return NULL; + } timer_setup(&sub->timer, tipc_sub_timeout, 0); timeout = tipc_sub_read(&sub->evt.s, timeout); if (timeout != TIPC_WAIT_FOREVER) From 7212303268918b9a203aebeacfdbd83b5e87b20d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Apr 2018 14:36:28 -0700 Subject: [PATCH 070/660] tcp: md5: reject TCP_MD5SIG or TCP_MD5SIG_EXT on established sockets syzbot/KMSAN reported an uninit-value in tcp_parse_options() [1] I believe this was caused by a TCP_MD5SIG being set on live flow. This is highly unexpected, since TCP option space is limited. For instance, presence of TCP MD5 option automatically disables TCP TimeStamp option at SYN/SYNACK time, which we can not do once flow has been established. Really, adding/deleting an MD5 key only makes sense on sockets in CLOSE or LISTEN state. [1] BUG: KMSAN: uninit-value in tcp_parse_options+0xd74/0x1a30 net/ipv4/tcp_input.c:3720 CPU: 1 PID: 6177 Comm: syzkaller192004 Not tainted 4.16.0+ #83 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 tcp_parse_options+0xd74/0x1a30 net/ipv4/tcp_input.c:3720 tcp_fast_parse_options net/ipv4/tcp_input.c:3858 [inline] tcp_validate_incoming+0x4f1/0x2790 net/ipv4/tcp_input.c:5184 tcp_rcv_established+0xf60/0x2bb0 net/ipv4/tcp_input.c:5453 tcp_v4_do_rcv+0x6cd/0xd90 net/ipv4/tcp_ipv4.c:1469 sk_backlog_rcv include/net/sock.h:908 [inline] __release_sock+0x2d6/0x680 net/core/sock.c:2271 release_sock+0x97/0x2a0 net/core/sock.c:2786 tcp_sendmsg+0xd6/0x100 net/ipv4/tcp.c:1464 inet_sendmsg+0x48d/0x740 net/ipv4/af_inet.c:764 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] SYSC_sendto+0x6c3/0x7e0 net/socket.c:1747 SyS_sendto+0x8a/0xb0 net/socket.c:1715 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x448fe9 RSP: 002b:00007fd472c64d38 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000006e5a30 RCX: 0000000000448fe9 RDX: 000000000000029f RSI: 0000000020a88f88 RDI: 0000000000000004 RBP: 00000000006e5a34 R08: 0000000020e68000 R09: 0000000000000010 R10: 00000000200007fd R11: 0000000000000216 R12: 0000000000000000 R13: 00007fff074899ef R14: 00007fd472c659c0 R15: 0000000000000009 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] tcp_send_ack+0x18c/0x910 net/ipv4/tcp_output.c:3624 __tcp_ack_snd_check net/ipv4/tcp_input.c:5040 [inline] tcp_ack_snd_check net/ipv4/tcp_input.c:5053 [inline] tcp_rcv_established+0x2103/0x2bb0 net/ipv4/tcp_input.c:5469 tcp_v4_do_rcv+0x6cd/0xd90 net/ipv4/tcp_ipv4.c:1469 sk_backlog_rcv include/net/sock.h:908 [inline] __release_sock+0x2d6/0x680 net/core/sock.c:2271 release_sock+0x97/0x2a0 net/core/sock.c:2786 tcp_sendmsg+0xd6/0x100 net/ipv4/tcp.c:1464 inet_sendmsg+0x48d/0x740 net/ipv4/af_inet.c:764 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] SYSC_sendto+0x6c3/0x7e0 net/socket.c:1747 SyS_sendto+0x8a/0xb0 net/socket.c:1715 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: cfb6eeb4c860 ("[TCP]: MD5 Signature Option (RFC2385) support.") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bccc4c270087..4fa3f812b9ff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2813,8 +2813,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: - /* Read the IP->Key mappings from userspace */ - err = tp->af_specific->md5_parse(sk, optname, optval, optlen); + if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) + err = tp->af_specific->md5_parse(sk, optname, optval, optlen); + else + err = -EINVAL; break; #endif case TCP_USER_TIMEOUT: From 7dd07c143a4b54d050e748bee4b4b9e94a7b1744 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Apr 2018 14:46:00 -0700 Subject: [PATCH 071/660] net: validate attribute sizes in neigh_dump_table() Since neigh_dump_table() calls nlmsg_parse() without giving policy constraints, attributes can have arbirary size that we must validate Reported by syzbot/KMSAN : BUG: KMSAN: uninit-value in neigh_master_filtered net/core/neighbour.c:2292 [inline] BUG: KMSAN: uninit-value in neigh_dump_table net/core/neighbour.c:2348 [inline] BUG: KMSAN: uninit-value in neigh_dump_info+0x1af0/0x2250 net/core/neighbour.c:2438 CPU: 1 PID: 3575 Comm: syzkaller268891 Not tainted 4.16.0+ #83 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 neigh_master_filtered net/core/neighbour.c:2292 [inline] neigh_dump_table net/core/neighbour.c:2348 [inline] neigh_dump_info+0x1af0/0x2250 net/core/neighbour.c:2438 netlink_dump+0x9ad/0x1540 net/netlink/af_netlink.c:2225 __netlink_dump_start+0x1167/0x12a0 net/netlink/af_netlink.c:2322 netlink_dump_start include/linux/netlink.h:214 [inline] rtnetlink_rcv_msg+0x1435/0x1560 net/core/rtnetlink.c:4598 netlink_rcv_skb+0x355/0x5f0 net/netlink/af_netlink.c:2447 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4653 netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline] netlink_unicast+0x1672/0x1750 net/netlink/af_netlink.c:1337 netlink_sendmsg+0x1048/0x1310 net/netlink/af_netlink.c:1900 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x43fed9 RSP: 002b:00007ffddbee2798 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fed9 RDX: 0000000000000000 RSI: 0000000020005000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 0000000000401800 R13: 0000000000401890 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1183 [inline] netlink_sendmsg+0x9a6/0x1310 net/netlink/af_netlink.c:1875 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 21fdd092acc7 ("net: Add support for filtering neigh dump by master device") Signed-off-by: Eric Dumazet Cc: David Ahern Reported-by: syzbot Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7b7a14abba28..a8bc02bb339f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2323,12 +2323,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL); if (!err) { - if (tb[NDA_IFINDEX]) + if (tb[NDA_IFINDEX]) { + if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) + return -EINVAL; filter_idx = nla_get_u32(tb[NDA_IFINDEX]); - - if (tb[NDA_MASTER]) + } + if (tb[NDA_MASTER]) { + if (nla_len(tb[NDA_MASTER]) != sizeof(u32)) + return -EINVAL; filter_master_idx = nla_get_u32(tb[NDA_MASTER]); - + } if (filter_idx || filter_master_idx) flags |= NLM_F_DUMP_FILTERED; } From 64d92aa2c9fe490ceffc440d7648ce369cd6cc3c Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 11 Apr 2018 10:09:32 -0500 Subject: [PATCH 072/660] ibmvnic: Handle all login error conditions There is a bug in handling the possible return codes from sending the login CRQ. The current code treats any non-success return value, minus failure to send the crq and a timeout waiting for a login response, as a need to re-send the login CRQ. This can put the drive in an infinite loop of trying to login when getting return values other that a partial success such as a return code of aborted. For these scenarios the login will not ever succeed at this point and the driver would need to be reset again. To resolve this loop trying to login is updated to only retry the login if the driver gets a return code of a partial success. Other return codes are treated as an error and the driver returns an error from ibmvnic_login(). To avoid infinite looping in the partial success return cases, the number of retries is capped at the maximum number of supported queues. This value was chosen because the driver does a renegotiation of capabilities which sets the number of queues possible and allows the driver to attempt a login for possible value for the number of queues supported. Signed-off-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 55 +++++++++++++++++++----------- drivers/net/ethernet/ibm/ibmvnic.h | 1 - 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 35fbb41cd2d4..d35f29d7af91 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -794,46 +794,61 @@ static int ibmvnic_login(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); unsigned long timeout = msecs_to_jiffies(30000); - struct device *dev = &adapter->vdev->dev; + int retry_count = 0; int rc; do { - if (adapter->renegotiate) { - adapter->renegotiate = false; + if (retry_count > IBMVNIC_MAX_QUEUES) { + netdev_warn(netdev, "Login attempts exceeded\n"); + return -1; + } + + adapter->init_done_rc = 0; + reinit_completion(&adapter->init_done); + rc = send_login(adapter); + if (rc) { + netdev_warn(netdev, "Unable to login\n"); + return rc; + } + + if (!wait_for_completion_timeout(&adapter->init_done, + timeout)) { + netdev_warn(netdev, "Login timed out\n"); + return -1; + } + + if (adapter->init_done_rc == PARTIALSUCCESS) { + retry_count++; release_sub_crqs(adapter, 1); + adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); send_cap_queries(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - dev_err(dev, "Capabilities query timeout\n"); + netdev_warn(netdev, + "Capabilities query timed out\n"); return -1; } + rc = init_sub_crqs(adapter); if (rc) { - dev_err(dev, - "Initialization of SCRQ's failed\n"); + netdev_warn(netdev, + "SCRQ initialization failed\n"); return -1; } + rc = init_sub_crq_irqs(adapter); if (rc) { - dev_err(dev, - "Initialization of SCRQ's irqs failed\n"); + netdev_warn(netdev, + "SCRQ irq initialization failed\n"); return -1; } - } - - reinit_completion(&adapter->init_done); - rc = send_login(adapter); - if (rc) { - dev_err(dev, "Unable to attempt device login\n"); - return rc; - } else if (!wait_for_completion_timeout(&adapter->init_done, - timeout)) { - dev_err(dev, "Login timeout\n"); + } else if (adapter->init_done_rc) { + netdev_warn(netdev, "Adapter login failed\n"); return -1; } - } while (adapter->renegotiate); + } while (adapter->init_done_rc == PARTIALSUCCESS); /* handle pending MAC address changes after successful login */ if (adapter->mac_change_pending) { @@ -3942,7 +3957,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, * to resend the login buffer with fewer queues requested. */ if (login_rsp_crq->generic.rc.code) { - adapter->renegotiate = true; + adapter->init_done_rc = login_rsp_crq->generic.rc.code; complete(&adapter->init_done); return 0; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 99c0b58c2c39..22391e8805f6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1035,7 +1035,6 @@ struct ibmvnic_adapter { struct ibmvnic_sub_crq_queue **tx_scrq; struct ibmvnic_sub_crq_queue **rx_scrq; - bool renegotiate; /* rx structs */ struct napi_struct *napi; From ebc701b796a67a5785399dcbc83d90e3b5f1e02f Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 11 Apr 2018 10:09:38 -0500 Subject: [PATCH 073/660] ibmvnic: Do not notify peers on parameter change resets When attempting to change the driver parameters, such as the MTU value or number of queues, do not call netdev_notify_peers(). Doing so will deadlock on the rtnl_lock. Signed-off-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d35f29d7af91..f4a56a3c07ad 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1843,7 +1843,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); - if (adapter->reset_reason != VNIC_RESET_FAILOVER) + if (adapter->reset_reason != VNIC_RESET_FAILOVER && + adapter->reset_reason != VNIC_RESET_CHANGE_PARAM) netdev_notify_peers(netdev); netif_carrier_on(netdev); From 5ff9c1a3dd92d2d8eeea6bb15b3502cfcc0e26fa Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 11 Apr 2018 17:17:34 +0200 Subject: [PATCH 074/660] selftests: net: add in_netns.sh to TEST_PROGS Script in_netns.sh isn't installed. -------------------- running psock_fanout test -------------------- ./run_afpackettests: line 12: ./in_netns.sh: No such file or directory [FAIL] -------------------- running psock_tpacket test -------------------- ./run_afpackettests: line 22: ./in_netns.sh: No such file or directory [FAIL] In current code added in_netns.sh to be installed. Fixes: cc30c93fa020 ("selftests/net: ignore background traffic in psock_fanout") Signed-off-by: Anders Roxell Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 785fc18a16b4..8f1e13d2e547 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,7 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa From 9d0c75bf6e03d9bf80c55b0f677dc9b982958fd5 Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Wed, 11 Apr 2018 15:05:16 -0700 Subject: [PATCH 075/660] strparser: Fix incorrect strp->need_bytes value. strp_data_ready resets strp->need_bytes to 0 if strp_peek_len indicates that the remainder of the message has been received. However, do_strp_work does not reset strp->need_bytes to 0. If do_strp_work completes a partial message, the value of strp->need_bytes will continue to reflect the needed bytes of the previous message, causing future invocations of strp_data_ready to return early if strp->need_bytes is less than strp_peek_len. Resetting strp->need_bytes to 0 in __strp_recv on handing a full message to the upper layer solves this problem. __strp_recv also calculates strp->need_bytes using stm->accum_len before stm->accum_len has been incremented by cand_len. This can cause strp->need_bytes to be equal to the full length of the message instead of the full length minus the accumulated length. This, in turn, causes strp_data_ready to return early, even when there is sufficient data to complete the partial message. Incrementing stm->accum_len before using it to calculate strp->need_bytes solves this problem. Found while testing net/tls_sw recv path. Fixes: 43a0c6751a322847 ("strparser: Stream parser for messages") Signed-off-by: Doron Roberts-Kedes Signed-off-by: David S. Miller --- net/strparser/strparser.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index b9283ce5cd85..805b139756db 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -296,9 +296,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, strp_start_timer(strp, timeo); } + stm->accum_len += cand_len; strp->need_bytes = stm->strp.full_len - stm->accum_len; - stm->accum_len += cand_len; stm->early_eaten = cand_len; STRP_STATS_ADD(strp->stats.bytes, cand_len); desc->count = 0; /* Stop reading socket */ @@ -321,6 +321,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* Hurray, we have a new message! */ cancel_delayed_work(&strp->msg_timer_work); strp->skb_head = NULL; + strp->need_bytes = 0; STRP_STATS_INCR(strp->stats.msgs); /* Give skb to upper layer */ @@ -410,9 +411,7 @@ void strp_data_ready(struct strparser *strp) return; if (strp->need_bytes) { - if (strp_peek_len(strp) >= strp->need_bytes) - strp->need_bytes = 0; - else + if (strp_peek_len(strp) < strp->need_bytes) return; } From 335b929b28aeb5bfc0698adb21deaf685b2982d1 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 12 Apr 2018 01:15:48 +0200 Subject: [PATCH 076/660] tipc: fix missing initializer in tipc_sendmsg() The stack variable 'dnode' in __tipc_sendmsg() may theoretically end up tipc_node_get_mtu() as an unitilalized variable. We fix this by intializing the variable at declaration. We also add a default else clause to the two conditional ones already there, so that we never end up in the named function if the given address type is illegal. Reported-by: syzbot+b0975ce9355b347c1546@syzkaller.appspotmail.com Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1fd1c8b5ce03..252a52ae0893 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1278,7 +1278,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; - u32 dnode, dport; + u32 dport, dnode = 0; u32 type, inst; int mtu, rc; @@ -1348,6 +1348,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) msg_set_destnode(hdr, dnode); msg_set_destport(hdr, dest->addr.id.ref); msg_set_hdr_sz(hdr, BASIC_H_SIZE); + } else { + return -EINVAL; } /* Block or return if destination link is congested */ From 5496295aefe86995e41398b0f76de601308fc3f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Apr 2018 16:47:35 -0700 Subject: [PATCH 077/660] nfp: ignore signals when communicating with management FW We currently allow signals to interrupt the wait for management FW commands. Exiting the wait should not cause trouble, the FW will just finish executing the command in the background and new commands will wait for the old one to finish. However, this may not be what users expect (Ctrl-C not actually stopping the command). Moreover some systems routinely request link information with signals pending (Ubuntu 14.04 runs a landscape-sysinfo python tool from MOTD) worrying users with errors like these: nfp 0000:04:00.0: nfp_nsp: Error -512 waiting for code 0x0007 to start nfp 0000:04:00.0: nfp: reading port table failed -512 Make the wait for management FW responses non-interruptible. Fixes: 1a64821c6af7 ("nfp: add support for service processor access") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 99bb679a9801..2abee0fe3a7c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -281,8 +281,7 @@ nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg, u32 nsp_cpp, u64 addr, if ((*reg & mask) == val) return 0; - if (msleep_interruptible(25)) - return -ERESTARTSYS; + msleep(25); if (time_after(start_time, wait_until)) return -ETIMEDOUT; From bc05f9bcd8cb62f935625850e535da183b4a07c0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Apr 2018 16:47:36 -0700 Subject: [PATCH 078/660] nfp: print a message when mutex wait is interrupted When waiting for an NFP mutex is interrupted print a message to make root causing later error messages easier. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c index f7b958181126..cb28ac03e4ca 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c @@ -211,8 +211,11 @@ int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex) break; err = msleep_interruptible(timeout_ms); - if (err != 0) + if (err != 0) { + nfp_info(mutex->cpp, + "interrupted waiting for NFP mutex\n"); return -ERESTARTSYS; + } if (time_is_before_eq_jiffies(warn_at)) { warn_at = jiffies + NFP_MUTEX_WAIT_NEXT_WARN * HZ; From 0b1a989ef5a751b5992842d1934e22de861a848e Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Wed, 11 Apr 2018 16:47:37 -0700 Subject: [PATCH 079/660] nfp: flower: move route ack control messages out of the workqueue Previously we processed the route ack control messages in the workqueue, this unnecessarily loads the workqueue. We can deal with these messages sooner as we know we are going to drop them. Fixes: 8e6a9046b66a ("nfp: flower vxlan neighbour offload") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 3735c09d2112..8ad857eb89c6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -258,9 +258,6 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: nfp_tunnel_keep_alive(app, skb); break; - case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH: - /* Acks from the NFP that the route is added - ignore. */ - break; default: nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", type); @@ -306,6 +303,9 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) nfp_flower_process_mtu_ack(app, skb)) { /* Handle MTU acks outside wq to prevent RTNL conflict. */ dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) { + /* Acks from the NFP that the route is added - ignore. */ + dev_consume_skb_any(skb); } else { skb_queue_tail(&priv->cmsg_skbs, skb); schedule_work(&priv->cmsg_work); From cf2cbadc20f5651c3dde9f5ac2ee52fb43aa4ddd Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Wed, 11 Apr 2018 16:47:38 -0700 Subject: [PATCH 080/660] nfp: flower: split and limit cmsg skb lists Introduce a second skb list for handling control messages and limit the number of allowed messages. Some control messages are considered more crucial than others, resulting in the need for a second skb list. By splitting the list into a separate high and low priority list we can ensure that messages on the high list get added to the head of the list that gets processed, this however has no functional impact. Previously there was no limit on the number of messages allowed on the queue, this could result in the queue growing boundlessly and eventually the host running out of memory. Fixes: b985f870a5f0 ("nfp: process control messages in workqueue in flower app") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.c | 38 +++++++++++++++++-- .../net/ethernet/netronome/nfp/flower/cmsg.h | 2 + .../net/ethernet/netronome/nfp/flower/main.c | 6 ++- .../net/ethernet/netronome/nfp/flower/main.h | 8 +++- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 8ad857eb89c6..577659f332e4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -272,18 +272,49 @@ out: void nfp_flower_cmsg_process_rx(struct work_struct *work) { + struct sk_buff_head cmsg_joined; struct nfp_flower_priv *priv; struct sk_buff *skb; priv = container_of(work, struct nfp_flower_priv, cmsg_work); + skb_queue_head_init(&cmsg_joined); - while ((skb = skb_dequeue(&priv->cmsg_skbs))) + spin_lock_bh(&priv->cmsg_skbs_high.lock); + skb_queue_splice_tail_init(&priv->cmsg_skbs_high, &cmsg_joined); + spin_unlock_bh(&priv->cmsg_skbs_high.lock); + + spin_lock_bh(&priv->cmsg_skbs_low.lock); + skb_queue_splice_tail_init(&priv->cmsg_skbs_low, &cmsg_joined); + spin_unlock_bh(&priv->cmsg_skbs_low.lock); + + while ((skb = __skb_dequeue(&cmsg_joined))) nfp_flower_cmsg_process_one_rx(priv->app, skb); } +static void +nfp_flower_queue_ctl_msg(struct nfp_app *app, struct sk_buff *skb, int type) +{ + struct nfp_flower_priv *priv = app->priv; + struct sk_buff_head *skb_head; + + if (type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY || + type == NFP_FLOWER_CMSG_TYPE_PORT_MOD) + skb_head = &priv->cmsg_skbs_high; + else + skb_head = &priv->cmsg_skbs_low; + + if (skb_queue_len(skb_head) >= NFP_FLOWER_WORKQ_MAX_SKBS) { + nfp_flower_cmsg_warn(app, "Dropping queued control messages\n"); + dev_kfree_skb_any(skb); + return; + } + + skb_queue_tail(skb_head, skb); + schedule_work(&priv->cmsg_work); +} + void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) { - struct nfp_flower_priv *priv = app->priv; struct nfp_flower_cmsg_hdr *cmsg_hdr; cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); @@ -307,7 +338,6 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) /* Acks from the NFP that the route is added - ignore. */ dev_consume_skb_any(skb); } else { - skb_queue_tail(&priv->cmsg_skbs, skb); - schedule_work(&priv->cmsg_work); + nfp_flower_queue_ctl_msg(app, skb, cmsg_hdr->type); } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 96bc0e33980c..b6c0fd053a50 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -108,6 +108,8 @@ #define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) #define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) +#define NFP_FLOWER_WORKQ_MAX_SKBS 30000 + #define nfp_flower_cmsg_warn(app, fmt, args...) \ do { \ if (net_ratelimit()) \ diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 6357e0720f43..ad02592a82b7 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -519,7 +519,8 @@ static int nfp_flower_init(struct nfp_app *app) app->priv = app_priv; app_priv->app = app; - skb_queue_head_init(&app_priv->cmsg_skbs); + skb_queue_head_init(&app_priv->cmsg_skbs_high); + skb_queue_head_init(&app_priv->cmsg_skbs_low); INIT_WORK(&app_priv->cmsg_work, nfp_flower_cmsg_process_rx); init_waitqueue_head(&app_priv->reify_wait_queue); @@ -549,7 +550,8 @@ static void nfp_flower_clean(struct nfp_app *app) { struct nfp_flower_priv *app_priv = app->priv; - skb_queue_purge(&app_priv->cmsg_skbs); + skb_queue_purge(&app_priv->cmsg_skbs_high); + skb_queue_purge(&app_priv->cmsg_skbs_low); flush_work(&app_priv->cmsg_work); nfp_flower_metadata_cleanup(app); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index e030b3ce4510..c67e1b54c614 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -107,7 +107,10 @@ struct nfp_mtu_conf { * @mask_table: Hash table used to store masks * @flow_table: Hash table used to store flower rules * @cmsg_work: Workqueue for control messages processing - * @cmsg_skbs: List of skbs for control message processing + * @cmsg_skbs_high: List of higher priority skbs for control message + * processing + * @cmsg_skbs_low: List of lower priority skbs for control message + * processing * @nfp_mac_off_list: List of MAC addresses to offload * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs * @nfp_ipv4_off_list: List of IPv4 addresses to offload @@ -136,7 +139,8 @@ struct nfp_flower_priv { DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS); DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS); struct work_struct cmsg_work; - struct sk_buff_head cmsg_skbs; + struct sk_buff_head cmsg_skbs_high; + struct sk_buff_head cmsg_skbs_low; struct list_head nfp_mac_off_list; struct list_head nfp_mac_index_list; struct list_head nfp_ipv4_off_list; From 1071ec9d453a38023579714b64a951a2fb982071 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 12 Apr 2018 14:24:31 +0800 Subject: [PATCH 081/660] sctp: do not check port in sctp_inet6_cmp_addr pf->cmp_addr() is called before binding a v6 address to the sock. It should not check ports, like in sctp_inet_cmp_addr. But sctp_inet6_cmp_addr checks the addr by invoking af(6)->cmp_addr, sctp_v6_cmp_addr where it also compares the ports. This would cause that setsockopt(SCTP_SOCKOPT_BINDX_ADD) could bind multiple duplicated IPv6 addresses after Commit 40b4f0fd74e4 ("sctp: lack the check for ports in sctp_v6_cmp_addr"). This patch is to remove af->cmp_addr called in sctp_inet6_cmp_addr, but do the proper check for both v6 addrs and v4mapped addrs. v1->v2: - define __sctp_v6_cmp_addr to do the common address comparison used for both pf and af v6 cmp_addr. Fixes: 40b4f0fd74e4 ("sctp: lack the check for ports in sctp_v6_cmp_addr") Reported-by: Jianwen Ji Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 74 ++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 31083b5035ec..2e3f7b75a8ec 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -556,44 +556,47 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, addr->v6.sin6_scope_id = 0; } +static int __sctp_v6_cmp_addr(const union sctp_addr *addr1, + const union sctp_addr *addr2) +{ + if (addr1->sa.sa_family != addr2->sa.sa_family) { + if (addr1->sa.sa_family == AF_INET && + addr2->sa.sa_family == AF_INET6 && + ipv6_addr_v4mapped(&addr2->v6.sin6_addr) && + addr2->v6.sin6_addr.s6_addr32[3] == + addr1->v4.sin_addr.s_addr) + return 1; + + if (addr2->sa.sa_family == AF_INET && + addr1->sa.sa_family == AF_INET6 && + ipv6_addr_v4mapped(&addr1->v6.sin6_addr) && + addr1->v6.sin6_addr.s6_addr32[3] == + addr2->v4.sin_addr.s_addr) + return 1; + + return 0; + } + + if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) + return 0; + + /* If this is a linklocal address, compare the scope_id. */ + if ((ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) && + addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id && + addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id) + return 0; + + return 1; +} + /* Compare addresses exactly. * v4-mapped-v6 is also in consideration. */ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2) { - if (addr1->sa.sa_family != addr2->sa.sa_family) { - if (addr1->sa.sa_family == AF_INET && - addr2->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr2->v6.sin6_addr)) { - if (addr2->v6.sin6_port == addr1->v4.sin_port && - addr2->v6.sin6_addr.s6_addr32[3] == - addr1->v4.sin_addr.s_addr) - return 1; - } - if (addr2->sa.sa_family == AF_INET && - addr1->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr1->v6.sin6_addr)) { - if (addr1->v6.sin6_port == addr2->v4.sin_port && - addr1->v6.sin6_addr.s6_addr32[3] == - addr2->v4.sin_addr.s_addr) - return 1; - } - return 0; - } - if (addr1->v6.sin6_port != addr2->v6.sin6_port) - return 0; - if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) - return 0; - /* If this is a linklocal address, compare the scope_id. */ - if (ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { - if (addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id && - (addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id)) { - return 0; - } - } - - return 1; + return __sctp_v6_cmp_addr(addr1, addr2) && + addr1->v6.sin6_port == addr2->v6.sin6_port; } /* Initialize addr struct to INADDR_ANY. */ @@ -875,8 +878,8 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2, struct sctp_sock *opt) { - struct sctp_af *af1, *af2; struct sock *sk = sctp_opt2sk(opt); + struct sctp_af *af1, *af2; af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); @@ -892,10 +895,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; - if (addr1->sa.sa_family != addr2->sa.sa_family) - return 0; - - return af1->cmp_addr(addr1, addr2); + return __sctp_v6_cmp_addr(addr1, addr2); } /* Verify that the provided sockaddr looks bindable. Common verification, From 53b76cdf7e8fecec1d09e38aad2f8579882591a8 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Thu, 12 Apr 2018 10:46:55 +0200 Subject: [PATCH 082/660] net: fix deadlock while clearing neighbor proxy table When coming from ndisc_netdev_event() in net/ipv6/ndisc.c, neigh_ifdown() is called with &nd_tbl, locking this while clearing the proxy neighbor entries when eg. deleting an interface. Calling the table's pndisc_destructor() with the lock still held, however, can cause a deadlock: When a multicast listener is available an IGMP packet of type ICMPV6_MGM_REDUCTION may be sent out. When reaching ip6_finish_output2(), if no neighbor entry for the target address is found, __neigh_create() is called with &nd_tbl, which it'll want to lock. Move the elements into their own list, then unlock the table and perform the destruction. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199289 Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().") Signed-off-by: Wolfgang Bumiller Signed-off-by: David S. Miller --- net/core/neighbour.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a8bc02bb339f..ce519861be59 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -55,7 +55,8 @@ static void neigh_timer_handler(struct timer_list *t); static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid); static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); -static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); +static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, + struct net_device *dev); #ifdef CONFIG_PROC_FS static const struct file_operations neigh_stat_seq_fops; @@ -291,8 +292,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) { write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev); - pneigh_ifdown(tbl, dev); - write_unlock_bh(&tbl->lock); + pneigh_ifdown_and_unlock(tbl, dev); del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); @@ -681,9 +681,10 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, return -ENOENT; } -static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, + struct net_device *dev) { - struct pneigh_entry *n, **np; + struct pneigh_entry *n, **np, *freelist = NULL; u32 h; for (h = 0; h <= PNEIGH_HASHMASK; h++) { @@ -691,16 +692,23 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) while ((n = *np) != NULL) { if (!dev || n->dev == dev) { *np = n->next; - if (tbl->pdestructor) - tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); - kfree(n); + n->next = freelist; + freelist = n; continue; } np = &n->next; } } + write_unlock_bh(&tbl->lock); + while ((n = freelist)) { + freelist = n->next; + n->next = NULL; + if (tbl->pdestructor) + tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); + kfree(n); + } return -ENOENT; } From 2290482379278e0254e6edfdb681d88359143fd1 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Mon, 9 Apr 2018 00:03:14 -0700 Subject: [PATCH 083/660] net: dsa: mv88e6xxx: Fix receive time stamp race condition. The DSA stack passes received PTP frames to this driver via mv88e6xxx_port_rxtstamp() for deferred delivery. The driver then queues the frame and kicks the worker thread. The work callback reads out the latched receive time stamp and then works through the queue, delivering any non-matching frames without a time stamp. If a new frame arrives after the worker thread has read out the time stamp register but enters the queue before the worker finishes processing the queue, that frame will be delivered without a time stamp. This patch fixes the race by moving the queue onto a list on the stack before reading out the latched time stamp value. Fixes: c6fe0ad2c3499 ("net: dsa: mv88e6xxx: add rx/tx timestamping support") Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/hwtstamp.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c index ac7694c71266..a036c490b7ce 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -285,10 +285,18 @@ static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip, struct sk_buff_head *rxq) { u16 buf[4] = { 0 }, status, seq_id; - u64 ns, timelo, timehi; struct skb_shared_hwtstamps *shwt; + struct sk_buff_head received; + u64 ns, timelo, timehi; + unsigned long flags; int err; + /* The latched timestamp belongs to one of the received frames. */ + __skb_queue_head_init(&received); + spin_lock_irqsave(&rxq->lock, flags); + skb_queue_splice_tail_init(rxq, &received); + spin_unlock_irqrestore(&rxq->lock, flags); + mutex_lock(&chip->reg_lock); err = mv88e6xxx_port_ptp_read(chip, ps->port_id, reg, buf, ARRAY_SIZE(buf)); @@ -311,7 +319,7 @@ static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip, /* Since the device can only handle one time stamp at a time, * we purge any extra frames from the queue. */ - for ( ; skb; skb = skb_dequeue(rxq)) { + for ( ; skb; skb = __skb_dequeue(&received)) { if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) { ns = timehi << 16 | timelo; From d0f8b9c5a350ca6fa842b52bfb88b77b34ee485b Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Mon, 9 Apr 2018 15:13:35 +0200 Subject: [PATCH 084/660] ASoC: adau17x1: Handling of DSP_RUN register during fw setup DSP_RUN needs to be disabled during firmware write otherwise we can end up with undefined behavior if writing to a dsp which is already running firmware. Signed-off-by: Danny Smith Signed-off-by: Robert Rosengren Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown --- sound/soc/codecs/adau17x1.c | 26 ++++++++++++++++++++------ sound/soc/codecs/adau17x1.h | 3 ++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/adau17x1.c b/sound/soc/codecs/adau17x1.c index 80c2a06285bb..12bf24c26818 100644 --- a/sound/soc/codecs/adau17x1.c +++ b/sound/soc/codecs/adau17x1.c @@ -502,7 +502,7 @@ static int adau17x1_hw_params(struct snd_pcm_substream *substream, } if (adau->sigmadsp) { - ret = adau17x1_setup_firmware(adau, params_rate(params)); + ret = adau17x1_setup_firmware(component, params_rate(params)); if (ret < 0) return ret; } @@ -835,26 +835,40 @@ bool adau17x1_volatile_register(struct device *dev, unsigned int reg) } EXPORT_SYMBOL_GPL(adau17x1_volatile_register); -int adau17x1_setup_firmware(struct adau *adau, unsigned int rate) +int adau17x1_setup_firmware(struct snd_soc_component *component, + unsigned int rate) { int ret; - int dspsr; + int dspsr, dsp_run; + struct adau *adau = snd_soc_component_get_drvdata(component); + struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component); + + snd_soc_dapm_mutex_lock(dapm); ret = regmap_read(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, &dspsr); if (ret) - return ret; + goto err; + + ret = regmap_read(adau->regmap, ADAU17X1_DSP_RUN, &dsp_run); + if (ret) + goto err; regmap_write(adau->regmap, ADAU17X1_DSP_ENABLE, 1); regmap_write(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, 0xf); + regmap_write(adau->regmap, ADAU17X1_DSP_RUN, 0); ret = sigmadsp_setup(adau->sigmadsp, rate); if (ret) { regmap_write(adau->regmap, ADAU17X1_DSP_ENABLE, 0); - return ret; + goto err; } regmap_write(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, dspsr); + regmap_write(adau->regmap, ADAU17X1_DSP_RUN, dsp_run); - return 0; +err: + snd_soc_dapm_mutex_unlock(dapm); + + return ret; } EXPORT_SYMBOL_GPL(adau17x1_setup_firmware); diff --git a/sound/soc/codecs/adau17x1.h b/sound/soc/codecs/adau17x1.h index a7b1cb770814..e6fe87beec07 100644 --- a/sound/soc/codecs/adau17x1.h +++ b/sound/soc/codecs/adau17x1.h @@ -68,7 +68,8 @@ int adau17x1_resume(struct snd_soc_component *component); extern const struct snd_soc_dai_ops adau17x1_dai_ops; -int adau17x1_setup_firmware(struct adau *adau, unsigned int rate); +int adau17x1_setup_firmware(struct snd_soc_component *component, + unsigned int rate); bool adau17x1_has_dsp(struct adau *adau); #define ADAU17X1_CLOCK_CONTROL 0x4000 From 4f75f1cbf95e2f0853cd229d042b203931b899af Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 12 Apr 2018 15:32:46 +0200 Subject: [PATCH 085/660] perf record: Change warning for missing sysfs entry to debug Using perf on 4.16.0 kernel on s390 shows this warning: failed: can't open node sysfs data each time I run command perf record ... for example: [root@s35lp76 perf]# ./perf record -e rB0000 -- sleep 1 [ perf record: Woken up 1 times to write data ] failed: can't open node sysfs data [ perf record: Captured and wrote 0.001 MB perf.data (4 samples) ] [root@s35lp76 perf]# It turns out commit e2091cedd51bf ("perf tools: Add MEM_TOPOLOGY feature to perf data file") tries to open directory named /sys/devices/system/node/ which does not exist on s390. This is the call stack: __cmd_record +---> perf_session__write_header +---> perf_header__adds_write +---> do_write_feat +---> write_mem_topology +---> build_mem_topology prints warning The issue starts in do_write_feat() which unconditionally loops over all features and now includes HEADER_MEM_TOPOLOGY and calls write_mem_topology(). Function record__init_features() at the beginning of __cmd_record() sets all features and then turns off some of them. Fix this by changing the warning to a level 2 debug output statement. So it is only shown when debug level 2 or higher is set. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180412133246.92801-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 121df1683c36..a8bff2178fbc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1320,7 +1320,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) dir = opendir(path); if (!dir) { - pr_warning("failed: can't open node sysfs data\n"); + pr_debug2("%s: could't read %s, does this arch have topology information?\n", + __func__, path); return -1; } From 7b366142a50ad79e48de8e67c5b3e8cfb9fa82dd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Apr 2018 14:58:24 -0300 Subject: [PATCH 086/660] perf report: Fix switching to another perf.data file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the TUI the 's' hotkey can be used to switch to another perf.data file in the current directory, but that got broken in Fixes: b01141f4f59c ("perf annotate: Initialize the priv are in symbol__new()"), that would show this once another file was chosen: ┌─Fatal Error─────────────────────────────────────┐ │Annotation needs to be init before symbol__init()│ │ │ │ │ │Press any key... │ └─────────────────────────────────────────────────┘ Fix it by just silently bailing out if symbol__annotation_init() was already called, just like is done with symbol__init(), i.e. they are done just once at session start, not when switching to a new perf.data file. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Fixes: b01141f4f59c ("perf annotate: Initialize the priv are in symbol__new()") Link: https://lkml.kernel.org/n/tip-ogppdtpzfax7y1h6gjdv5s6u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 62b2dd2253eb..1466814ebada 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2091,16 +2091,14 @@ static bool symbol__read_kptr_restrict(void) int symbol__annotation_init(void) { + if (symbol_conf.init_annotation) + return 0; + if (symbol_conf.initialized) { pr_err("Annotation needs to be init before symbol__init()\n"); return -1; } - if (symbol_conf.init_annotation) { - pr_warning("Annotation being initialized multiple times\n"); - return 0; - } - symbol_conf.priv_size += sizeof(struct annotation); symbol_conf.init_annotation = true; return 0; From 43c4023152a9c5742948ac919e58ade127fa4e2e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Apr 2018 15:23:02 -0300 Subject: [PATCH 087/660] perf annotate: Allow setting the offset level in .perfconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default is 1 (jump_target): # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave Samples: 3K of event 'cycles:ppp', 3000 Hz, Event count (approx.): 2766398574 _raw_spin_lock_irqsave() /proc/kcore 0.26 nop 4.61 push %rbx 19.33 pushfq 7.97 pop %rax 0.32 nop 0.06 mov %rax,%rbx 14.63 cli 0.06 nop xor %eax,%eax mov $0x1,%edx 49.94 lock cmpxchg %edx,(%rdi) 0.16 test %eax,%eax ↓ jne 2b 2.66 mov %rbx,%rax pop %rbx ← retq 2b: mov %eax,%esi → callq *ffffffffb30eaed0 mov %rbx,%rax pop %rbx ← retq # But one can ask for showing offsets for call instructions by setting this: # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave Samples: 3K of event 'cycles:ppp', 3000 Hz, Event count (approx.): 2766398574 _raw_spin_lock_irqsave() /proc/kcore 0.26 nop 4.61 push %rbx 19.33 pushfq 7.97 pop %rax 0.32 nop 0.06 mov %rax,%rbx 14.63 cli 0.06 nop xor %eax,%eax mov $0x1,%edx 49.94 lock cmpxchg %edx,(%rdi) 0.16 test %eax,%eax ↓ jne 2b 2.66 mov %rbx,%rax pop %rbx ← retq 2b: mov %eax,%esi 2d: → callq *ffffffffb30eaed0 mov %rbx,%rax pop %rbx ← retq # Or using a big value to ask for all offsets to be shown: # cat ~/.perfconfig [annotate] offset_level = 100 hide_src_code = true # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave Samples: 3K of event 'cycles:ppp', 3000 Hz, Event count (approx.): 2766398574 _raw_spin_lock_irqsave() /proc/kcore 0.26 0: nop 4.61 5: push %rbx 19.33 6: pushfq 7.97 7: pop %rax 0.32 8: nop 0.06 d: mov %rax,%rbx 14.63 10: cli 0.06 11: nop 17: xor %eax,%eax 19: mov $0x1,%edx 49.94 1e: lock cmpxchg %edx,(%rdi) 0.16 22: test %eax,%eax 24: ↓ jne 2b 2.66 26: mov %rbx,%rax 29: pop %rbx 2a: ← retq 2b: mov %eax,%esi 2d: → callq *ffffffffb30eaed0 32: mov %rbx,%rax 35: pop %rbx 36: ← retq # This also affects the TUI, i.e. the default 'perf annotate' and 'perf top/report' -> A hotkey -> annotate interfaces, when slang-devel is present in the build, i.e.: # perf version --build-options | grep slang libslang: [ on ] # HAVE_SLANG_SUPPORT # Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-venm6x5zrt40eu8hxdsmqxz6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 +++++ tools/perf/util/annotate.c | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5b4fff3adc4b..32f4a898e3f2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -334,6 +334,11 @@ annotate.*:: 99.93 │ mov %eax,%eax + annotate.offset_level:: + Default is '1', meaning just jump targets will have offsets show right beside + the instruction. When set to '2' 'call' instructions will also have its offsets + shown, 3 or higher will show offsets for all instructions. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5edc565d86c4..536ee148bff8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2649,10 +2649,11 @@ int __annotation__scnprintf_samples_period(struct annotation *notes, */ static struct annotation_config { const char *name; - bool *value; + void *value; } annotation__configs[] = { ANNOTATION__CFG(hide_src_code), ANNOTATION__CFG(jump_arrows), + ANNOTATION__CFG(offset_level), ANNOTATION__CFG(show_linenr), ANNOTATION__CFG(show_nr_jumps), ANNOTATION__CFG(show_nr_samples), @@ -2684,8 +2685,16 @@ static int annotation__config(const char *var, const char *value, if (cfg == NULL) pr_debug("%s variable unknown, ignoring...", var); - else - *cfg->value = perf_config_bool(name, value); + else if (strcmp(var, "annotate.offset_level") == 0) { + perf_config_int(cfg->value, name, value); + + if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL; + else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL; + } else { + *(bool *)cfg->value = perf_config_bool(name, value); + } return 0; } From b0d5c81e872ed21de1e56feb0fa6e4161da7be61 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Apr 2018 16:28:18 -0300 Subject: [PATCH 088/660] perf annotate: Handle variables in 'sub', 'or' and many other instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like is done for 'mov' and others that can have as source or targets variables resolved by objdump, to make them more compact: - orb $0x4,0x224d71(%rip) # 226ca4 <_rtld_global+0xca4> + orb $0x4,_rtld_global+0xca4 Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-efex7746id4w4wa03nqxvh3m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/annotate/instructions.c | 67 ++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5bd1ba8c0282..44f5aba78210 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -1,21 +1,43 @@ // SPDX-License-Identifier: GPL-2.0 static struct ins x86__instructions[] = { + { .name = "adc", .ops = &mov_ops, }, + { .name = "adcb", .ops = &mov_ops, }, + { .name = "adcl", .ops = &mov_ops, }, { .name = "add", .ops = &mov_ops, }, { .name = "addl", .ops = &mov_ops, }, { .name = "addq", .ops = &mov_ops, }, + { .name = "addsd", .ops = &mov_ops, }, { .name = "addw", .ops = &mov_ops, }, { .name = "and", .ops = &mov_ops, }, + { .name = "andb", .ops = &mov_ops, }, + { .name = "andl", .ops = &mov_ops, }, + { .name = "andpd", .ops = &mov_ops, }, + { .name = "andps", .ops = &mov_ops, }, + { .name = "andq", .ops = &mov_ops, }, + { .name = "andw", .ops = &mov_ops, }, + { .name = "bsr", .ops = &mov_ops, }, + { .name = "bt", .ops = &mov_ops, }, + { .name = "btr", .ops = &mov_ops, }, { .name = "bts", .ops = &mov_ops, }, + { .name = "btsq", .ops = &mov_ops, }, { .name = "call", .ops = &call_ops, }, { .name = "callq", .ops = &call_ops, }, + { .name = "cmovbe", .ops = &mov_ops, }, + { .name = "cmove", .ops = &mov_ops, }, + { .name = "cmovae", .ops = &mov_ops, }, { .name = "cmp", .ops = &mov_ops, }, { .name = "cmpb", .ops = &mov_ops, }, { .name = "cmpl", .ops = &mov_ops, }, { .name = "cmpq", .ops = &mov_ops, }, { .name = "cmpw", .ops = &mov_ops, }, { .name = "cmpxch", .ops = &mov_ops, }, + { .name = "cmpxchg", .ops = &mov_ops, }, + { .name = "cs", .ops = &mov_ops, }, { .name = "dec", .ops = &dec_ops, }, { .name = "decl", .ops = &dec_ops, }, + { .name = "divsd", .ops = &mov_ops, }, + { .name = "divss", .ops = &mov_ops, }, + { .name = "gs", .ops = &mov_ops, }, { .name = "imul", .ops = &mov_ops, }, { .name = "inc", .ops = &dec_ops, }, { .name = "incl", .ops = &dec_ops, }, @@ -57,25 +79,68 @@ static struct ins x86__instructions[] = { { .name = "lea", .ops = &mov_ops, }, { .name = "lock", .ops = &lock_ops, }, { .name = "mov", .ops = &mov_ops, }, + { .name = "movapd", .ops = &mov_ops, }, + { .name = "movaps", .ops = &mov_ops, }, { .name = "movb", .ops = &mov_ops, }, { .name = "movdqa", .ops = &mov_ops, }, + { .name = "movdqu", .ops = &mov_ops, }, { .name = "movl", .ops = &mov_ops, }, { .name = "movq", .ops = &mov_ops, }, + { .name = "movsd", .ops = &mov_ops, }, { .name = "movslq", .ops = &mov_ops, }, + { .name = "movss", .ops = &mov_ops, }, + { .name = "movupd", .ops = &mov_ops, }, + { .name = "movups", .ops = &mov_ops, }, + { .name = "movw", .ops = &mov_ops, }, { .name = "movzbl", .ops = &mov_ops, }, { .name = "movzwl", .ops = &mov_ops, }, + { .name = "mulsd", .ops = &mov_ops, }, + { .name = "mulss", .ops = &mov_ops, }, { .name = "nop", .ops = &nop_ops, }, { .name = "nopl", .ops = &nop_ops, }, { .name = "nopw", .ops = &nop_ops, }, { .name = "or", .ops = &mov_ops, }, + { .name = "orb", .ops = &mov_ops, }, { .name = "orl", .ops = &mov_ops, }, + { .name = "orps", .ops = &mov_ops, }, + { .name = "orq", .ops = &mov_ops, }, + { .name = "pand", .ops = &mov_ops, }, + { .name = "paddq", .ops = &mov_ops, }, + { .name = "pcmpeqb", .ops = &mov_ops, }, + { .name = "por", .ops = &mov_ops, }, + { .name = "rclb", .ops = &mov_ops, }, + { .name = "rcll", .ops = &mov_ops, }, + { .name = "retq", .ops = &ret_ops, }, + { .name = "sbb", .ops = &mov_ops, }, + { .name = "sbbl", .ops = &mov_ops, }, + { .name = "sete", .ops = &mov_ops, }, + { .name = "sub", .ops = &mov_ops, }, + { .name = "subl", .ops = &mov_ops, }, + { .name = "subq", .ops = &mov_ops, }, + { .name = "subsd", .ops = &mov_ops, }, + { .name = "subw", .ops = &mov_ops, }, { .name = "test", .ops = &mov_ops, }, { .name = "testb", .ops = &mov_ops, }, { .name = "testl", .ops = &mov_ops, }, + { .name = "ucomisd", .ops = &mov_ops, }, + { .name = "ucomiss", .ops = &mov_ops, }, + { .name = "vaddsd", .ops = &mov_ops, }, + { .name = "vandpd", .ops = &mov_ops, }, + { .name = "vmovdqa", .ops = &mov_ops, }, + { .name = "vmovq", .ops = &mov_ops, }, + { .name = "vmovsd", .ops = &mov_ops, }, + { .name = "vmulsd", .ops = &mov_ops, }, + { .name = "vorpd", .ops = &mov_ops, }, + { .name = "vsubsd", .ops = &mov_ops, }, + { .name = "vucomisd", .ops = &mov_ops, }, { .name = "xadd", .ops = &mov_ops, }, { .name = "xbeginl", .ops = &jump_ops, }, { .name = "xbeginq", .ops = &jump_ops, }, - { .name = "retq", .ops = &ret_ops, }, + { .name = "xchg", .ops = &mov_ops, }, + { .name = "xor", .ops = &mov_ops, }, + { .name = "xorb", .ops = &mov_ops, }, + { .name = "xorpd", .ops = &mov_ops, }, + { .name = "xorps", .ops = &mov_ops, }, }; static bool x86__ins_is_fused(struct arch *arch, const char *ins1, From 92183a42898dc400b89da35685d1814ac6acd3d8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 5 Apr 2018 16:18:03 +0300 Subject: [PATCH 089/660] fsnotify: fix ignore mask logic in send_to_group() The ignore mask logic in send_to_group() does not match the logic in fanotify_should_send_event(). In the latter, a vfsmount mark ignore mask precedes an inode mark mask and in the former, it does not. That difference may cause events to be sent to fanotify backend for no reason. Fix the logic in send_to_group() to match that of fanotify_should_send_event(). Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 219b269c737e..613ec7e5a465 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -192,8 +192,9 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; - __u32 inode_test_mask = 0; - __u32 vfsmount_test_mask = 0; + __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 marks_mask = 0; + __u32 marks_ignored_mask = 0; if (unlikely(!inode_mark && !vfsmount_mark)) { BUG(); @@ -213,29 +214,25 @@ static int send_to_group(struct inode *to_tell, /* does the inode mark tell us to do something? */ if (inode_mark) { group = inode_mark->group; - inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); - inode_test_mask &= inode_mark->mask; - inode_test_mask &= ~inode_mark->ignored_mask; + marks_mask |= inode_mark->mask; + marks_ignored_mask |= inode_mark->ignored_mask; } /* does the vfsmount_mark tell us to do something? */ if (vfsmount_mark) { - vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); group = vfsmount_mark->group; - vfsmount_test_mask &= vfsmount_mark->mask; - vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; - if (inode_mark) - vfsmount_test_mask &= ~inode_mark->ignored_mask; + marks_mask |= vfsmount_mark->mask; + marks_ignored_mask |= vfsmount_mark->ignored_mask; } pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" - " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" + " vfsmount_mark=%p marks_mask=%x marks_ignored_mask=%x" " data=%p data_is=%d cookie=%d\n", - __func__, group, to_tell, mask, inode_mark, - inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, + __func__, group, to_tell, mask, inode_mark, vfsmount_mark, + marks_mask, marks_ignored_mask, data, data_is, cookie); - if (!inode_test_mask && !vfsmount_test_mask) + if (!(test_mask & marks_mask & ~marks_ignored_mask)) return 0; return group->ops->handle_event(group, to_tell, inode_mark, From 8e984f8667ff4225092af734eef28a3d7bae8626 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 5 Apr 2018 16:18:04 +0300 Subject: [PATCH 090/660] fsnotify: fix typo in a comment about mark->g_list Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9f1edb92c97e..e0c95c9f1e29 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -248,7 +248,7 @@ struct fsnotify_mark { /* Group this mark is for. Set on mark creation, stable until last ref * is dropped */ struct fsnotify_group *group; - /* List of marks by group->i_fsnotify_marks. Also reused for queueing + /* List of marks by group->marks_list. Also reused for queueing * mark into destroy_list when it's waiting for the end of SRCU period * before it can be freed. [group->mark_mutex] */ struct list_head g_list; From 96348e49366c6e2a5a2e62ba0350f66ef5d67ea7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 5 Apr 2018 16:18:05 +0300 Subject: [PATCH 091/660] MAINTAINERS: add an entry for FSNOTIFY infrastructure There is alreay an entry for all the backends, but those entries do not cover all the fsnotify files. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4c3c17e1e163..e3e0a79e2069 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5780,6 +5780,14 @@ F: fs/crypto/ F: include/linux/fscrypt*.h F: Documentation/filesystems/fscrypt.rst +FSNOTIFY: FILESYSTEM NOTIFICATION INFRASTRUCTURE +M: Jan Kara +R: Amir Goldstein +L: linux-fsdevel@vger.kernel.org +S: Maintained +F: fs/notify/ +F: include/linux/fsnotify*.h + FUJITSU LAPTOP EXTRAS M: Jonathan Woithe L: platform-driver-x86@vger.kernel.org From 9267c430c6b6f4c0120e3c6bb847313d633f02a6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 13 Apr 2018 14:58:25 +0800 Subject: [PATCH 092/660] virtio-net: add missing virtqueue kick when flushing packets We tends to batch submitting packets during XDP_TX. This requires to kick virtqueue after a batch, we tried to do it through xdp_do_flush_map() which only makes sense for devmap not XDP_TX. So explicitly kick the virtqueue in this case. Reported-by: Kimitoshi Takahashi Tested-by: Kimitoshi Takahashi Cc: Daniel Borkmann Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7b187ec7411e..dfe78308044d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1269,7 +1269,9 @@ static int virtnet_poll(struct napi_struct *napi, int budget) { struct receive_queue *rq = container_of(napi, struct receive_queue, napi); - unsigned int received; + struct virtnet_info *vi = rq->vq->vdev->priv; + struct send_queue *sq; + unsigned int received, qp; bool xdp_xmit = false; virtnet_poll_cleantx(rq); @@ -1280,8 +1282,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget) if (received < budget) virtqueue_napi_complete(napi, rq->vq, received); - if (xdp_xmit) + if (xdp_xmit) { + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + + smp_processor_id(); + sq = &vi->sq[qp]; + virtqueue_kick(sq->vq); xdp_do_flush_map(); + } return received; } From 5846c131c39b6d0add36ec19dc8650700690f930 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 12 Apr 2018 20:50:33 +0200 Subject: [PATCH 093/660] l2tp: hold reference on tunnels in netlink dumps l2tp_tunnel_find_nth() is unsafe: no reference is held on the returned tunnel, therefore it can be freed whenever the caller uses it. This patch defines l2tp_tunnel_get_nth() which works similarly, but also takes a reference on the returned tunnel. The caller then has to drop it after it stops using the tunnel. Convert netlink dumps to make them safe against concurrent tunnel deletion. Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 20 ++++++++++++++++++++ net/l2tp/l2tp_core.h | 2 ++ net/l2tp/l2tp_netlink.c | 11 ++++++++--- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 0fbd3ee26165..c8c4183f0f37 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -183,6 +183,26 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) } EXPORT_SYMBOL_GPL(l2tp_tunnel_get); +struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) +{ + const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel; + int count = 0; + + rcu_read_lock_bh(); + list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { + if (++count > nth) { + l2tp_tunnel_inc_refcount(tunnel); + rcu_read_unlock_bh(); + return tunnel; + } + } + rcu_read_unlock_bh(); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth); + /* Lookup a session. A new reference is held on the returned session. */ struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index ba33cbec71eb..e4896413b2b6 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -212,6 +212,8 @@ static inline void *l2tp_session_priv(struct l2tp_session *session) } struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); +struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth); + void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_get(const struct net *net, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index b05dbd9ffcb2..6616c9fd292f 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -487,14 +487,17 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback struct net *net = sock_net(skb->sk); for (;;) { - tunnel = l2tp_tunnel_find_nth(net, ti); + tunnel = l2tp_tunnel_get_nth(net, ti); if (tunnel == NULL) goto out; if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - tunnel, L2TP_CMD_TUNNEL_GET) < 0) + tunnel, L2TP_CMD_TUNNEL_GET) < 0) { + l2tp_tunnel_dec_refcount(tunnel); goto out; + } + l2tp_tunnel_dec_refcount(tunnel); ti++; } @@ -848,7 +851,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback for (;;) { if (tunnel == NULL) { - tunnel = l2tp_tunnel_find_nth(net, ti); + tunnel = l2tp_tunnel_get_nth(net, ti); if (tunnel == NULL) goto out; } @@ -856,6 +859,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback session = l2tp_session_get_nth(tunnel, si); if (session == NULL) { ti++; + l2tp_tunnel_dec_refcount(tunnel); tunnel = NULL; si = 0; continue; @@ -865,6 +869,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback cb->nlh->nlmsg_seq, NLM_F_MULTI, session, L2TP_CMD_SESSION_GET) < 0) { l2tp_session_dec_refcount(session); + l2tp_tunnel_dec_refcount(tunnel); break; } l2tp_session_dec_refcount(session); From 0e0c3fee3a59a387aeecc4fca6f3a2e9615a5443 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 12 Apr 2018 20:50:34 +0200 Subject: [PATCH 094/660] l2tp: hold reference on tunnels printed in pppol2tp proc file Use l2tp_tunnel_get_nth() instead of l2tp_tunnel_find_nth(), to be safe against concurrent tunnel deletion. Unlike sessions, we can't drop the reference held on tunnels in pppol2tp_seq_show(). Tunnels are reused across several calls to pppol2tp_seq_start() when iterating over sessions. These iterations need the tunnel for accessing the next session. Therefore the only safe moment for dropping the reference is just before searching for the next tunnel. Normally, the last invocation of pppol2tp_next_tunnel() doesn't find any new tunnel, so it drops the last tunnel without taking any new reference. However, in case of error, pppol2tp_seq_stop() is called directly, so we have to drop the reference there. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 896bbca9bdaa..7d0c963680e6 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1551,16 +1551,19 @@ struct pppol2tp_seq_data { static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) { + /* Drop reference taken during previous invocation */ + if (pd->tunnel) + l2tp_tunnel_dec_refcount(pd->tunnel); + for (;;) { - pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx); + pd->tunnel = l2tp_tunnel_get_nth(net, pd->tunnel_idx); pd->tunnel_idx++; - if (pd->tunnel == NULL) - break; + /* Only accept L2TPv2 tunnels */ + if (!pd->tunnel || pd->tunnel->version == 2) + return; - /* Ignore L2TPv3 tunnels */ - if (pd->tunnel->version < 3) - break; + l2tp_tunnel_dec_refcount(pd->tunnel); } } @@ -1609,7 +1612,14 @@ static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos) static void pppol2tp_seq_stop(struct seq_file *p, void *v) { - /* nothing to do */ + struct pppol2tp_seq_data *pd = v; + + if (!pd || pd == SEQ_START_TOKEN) + return; + + /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */ + if (pd->tunnel) + l2tp_tunnel_dec_refcount(pd->tunnel); } static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) From f726214d9b23e5fce8c11937577a289a3202498f Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 12 Apr 2018 20:50:35 +0200 Subject: [PATCH 095/660] l2tp: hold reference on tunnels printed in l2tp/tunnels debugfs file Use l2tp_tunnel_get_nth() instead of l2tp_tunnel_find_nth(), to be safe against concurrent tunnel deletion. Use the same mechanism as in l2tp_ppp.c for dropping the reference taken by l2tp_tunnel_get_nth(). That is, drop the reference just before looking up the next tunnel. In case of error, drop the last accessed tunnel in l2tp_dfs_seq_stop(). That was the last use of l2tp_tunnel_find_nth(). Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 20 -------------------- net/l2tp/l2tp_core.h | 1 - net/l2tp/l2tp_debugfs.c | 15 +++++++++++++-- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c8c4183f0f37..40261cb68e83 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -355,26 +355,6 @@ err_tlock: } EXPORT_SYMBOL_GPL(l2tp_session_register); -struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth) -{ - struct l2tp_net *pn = l2tp_pernet(net); - struct l2tp_tunnel *tunnel; - int count = 0; - - rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth) { - rcu_read_unlock_bh(); - return tunnel; - } - } - - rcu_read_unlock_bh(); - - return NULL; -} -EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth); - /***************************************************************************** * Receive data handling *****************************************************************************/ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index e4896413b2b6..c199020f8a8a 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -222,7 +222,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); -struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 72e713da4733..b8f9d45bfeb1 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -47,7 +47,11 @@ struct l2tp_dfs_seq_data { static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) { - pd->tunnel = l2tp_tunnel_find_nth(pd->net, pd->tunnel_idx); + /* Drop reference taken during previous invocation */ + if (pd->tunnel) + l2tp_tunnel_dec_refcount(pd->tunnel); + + pd->tunnel = l2tp_tunnel_get_nth(pd->net, pd->tunnel_idx); pd->tunnel_idx++; } @@ -96,7 +100,14 @@ static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos) static void l2tp_dfs_seq_stop(struct seq_file *p, void *v) { - /* nothing to do */ + struct l2tp_dfs_seq_data *pd = v; + + if (!pd || pd == SEQ_START_TOKEN) + return; + + /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */ + if (pd->tunnel) + l2tp_tunnel_dec_refcount(pd->tunnel); } static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) From a1cc7034e33d12dc17d13fbcd7d597d552889097 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 12 Apr 2018 22:30:44 -0400 Subject: [PATCH 096/660] MIPS: io: Add barrier after register read in readX() While a barrier is present in the writeX() functions before the register write, a similar barrier is missing in the readX() functions after the register read. This could allow memory accesses following readX() to observe stale data. Signed-off-by: Sinan Kaya Reported-by: Arnd Bergmann Cc: Ralf Baechle Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/19069/ [jhogan@kernel.org: Tidy commit message] Signed-off-by: James Hogan --- arch/mips/include/asm/io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index fd00ddafb425..a7d0b836f2f7 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -377,6 +377,8 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem) \ BUG(); \ } \ \ + /* prevent prefetching of coherent DMA data prematurely */ \ + rmb(); \ return pfx##ioswab##bwlq(__mem, __val); \ } From c7cd882469fc5042a5c84122b4062d7f53076db7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 13 Apr 2018 21:54:37 +0200 Subject: [PATCH 097/660] parisc: Fix missing binfmt_elf32.o build error Commit 71d577db01a5 ("parisc: Switch to generic COMPAT_BINFMT_ELF") removed the binfmt_elf32.c source file, but missed to drop the object file from the list of object files the Makefile, which then results in a build error. Fixes: 71d577db01a5 ("parisc: Switch to generic COMPAT_BINFMT_ELF") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Helge Deller --- arch/parisc/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index eafd06ab59ef..e5de34d00b1a 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -23,7 +23,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PA11) += pci-dma.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o signal32.o +obj-$(CONFIG_64BIT) += sys_parisc32.o signal32.o obj-$(CONFIG_STACKTRACE)+= stacktrace.o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o From 43838a23a05fbd13e47d750d3dfd77001536dd33 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 11 Apr 2018 13:27:52 -0400 Subject: [PATCH 098/660] random: fix crng_ready() test The crng_init variable has three states: 0: The CRNG is not initialized at all 1: The CRNG has a small amount of entropy, hopefully good enough for early-boot, non-cryptographical use cases 2: The CRNG is fully initialized and we are sure it is safe for cryptographic use cases. The crng_ready() function should only return true once we are in the last state. This addresses CVE-2018-1108. Reported-by: Jann Horn Fixes: e192be9d9a30 ("random: replace non-blocking pool...") Cc: stable@kernel.org # 4.8+ Signed-off-by: Theodore Ts'o Reviewed-by: Jann Horn --- drivers/char/random.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e027e7fa1472..c8ec1e70abde 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -427,7 +427,7 @@ struct crng_state primary_crng = { * its value (from 0->1->2). */ static int crng_init = 0; -#define crng_ready() (likely(crng_init > 0)) +#define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) static void _extract_crng(struct crng_state *crng, @@ -794,7 +794,7 @@ static int crng_fast_load(const char *cp, size_t len) if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; - if (crng_ready()) { + if (crng_init != 0) { spin_unlock_irqrestore(&primary_crng.lock, flags); return 0; } @@ -856,7 +856,7 @@ static void _extract_crng(struct crng_state *crng, { unsigned long v, flags; - if (crng_init > 1 && + if (crng_ready() && time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)) crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL); spin_lock_irqsave(&crng->lock, flags); @@ -1139,7 +1139,7 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_mix(fast_pool); add_interrupt_bench(cycles); - if (!crng_ready()) { + if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && crng_fast_load((char *) fast_pool->pool, sizeof(fast_pool->pool))) { @@ -2212,7 +2212,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, { struct entropy_store *poolp = &input_pool; - if (!crng_ready()) { + if (unlikely(crng_init == 0)) { crng_fast_load(buffer, count); return; } From dc12baacb95f205948f64dc936a47d89ee110117 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 11 Apr 2018 14:58:27 -0400 Subject: [PATCH 099/660] random: use a different mixing algorithm for add_device_randomness() add_device_randomness() use of crng_fast_load() was highly problematic. Some callers of add_device_randomness() can pass in a large amount of static information. This would immediately promote the crng_init state from 0 to 1, without really doing much to initialize the primary_crng's internal state with something even vaguely unpredictable. Since we don't have the speed constraints of add_interrupt_randomness(), we can do a better job mixing in the what unpredictability a device driver or architecture maintainer might see fit to give us, and do it in a way which does not bump the crng_init_cnt variable. Also, since add_device_randomness() doesn't bump any entropy accounting in crng_init state 0, mix the device randomness into the input_pool entropy pool as well. This is related to CVE-2018-1108. Reported-by: Jann Horn Fixes: ee7998c50c26 ("random: do not ignore early device randomness") Cc: stable@kernel.org # 4.13+ Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 55 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index c8ec1e70abde..6baa828c0493 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -787,6 +787,10 @@ static void crng_initialize(struct crng_state *crng) crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } +/* + * crng_fast_load() can be called by code in the interrupt service + * path. So we can't afford to dilly-dally. + */ static int crng_fast_load(const char *cp, size_t len) { unsigned long flags; @@ -813,6 +817,51 @@ static int crng_fast_load(const char *cp, size_t len) return 1; } +/* + * crng_slow_load() is called by add_device_randomness, which has two + * attributes. (1) We can't trust the buffer passed to it is + * guaranteed to be unpredictable (so it might not have any entropy at + * all), and (2) it doesn't have the performance constraints of + * crng_fast_load(). + * + * So we do something more comprehensive which is guaranteed to touch + * all of the primary_crng's state, and which uses a LFSR with a + * period of 255 as part of the mixing algorithm. Finally, we do + * *not* advance crng_init_cnt since buffer we may get may be something + * like a fixed DMI table (for example), which might very well be + * unique to the machine, but is otherwise unvarying. + */ +static int crng_slow_load(const char *cp, size_t len) +{ + unsigned long flags; + static unsigned char lfsr = 1; + unsigned char tmp; + unsigned i, max = CHACHA20_KEY_SIZE; + const char * src_buf = cp; + char * dest_buf = (char *) &primary_crng.state[4]; + + if (!spin_trylock_irqsave(&primary_crng.lock, flags)) + return 0; + if (crng_init != 0) { + spin_unlock_irqrestore(&primary_crng.lock, flags); + return 0; + } + if (len > max) + max = len; + + for (i = 0; i < max ; i++) { + tmp = lfsr; + lfsr >>= 1; + if (tmp & 1) + lfsr ^= 0xE1; + tmp = dest_buf[i % CHACHA20_KEY_SIZE]; + dest_buf[i % CHACHA20_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; + lfsr += (tmp << 3) | (tmp >> 5); + } + spin_unlock_irqrestore(&primary_crng.lock, flags); + return 1; +} + static void crng_reseed(struct crng_state *crng, struct entropy_store *r) { unsigned long flags; @@ -981,10 +1030,8 @@ void add_device_randomness(const void *buf, unsigned int size) unsigned long time = random_get_entropy() ^ jiffies; unsigned long flags; - if (!crng_ready()) { - crng_fast_load(buf, size); - return; - } + if (!crng_ready() && size) + crng_slow_load(buf, size); trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); From 8ef35c866f8862df074a49a93b0309725812dea8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 11 Apr 2018 15:23:56 -0400 Subject: [PATCH 100/660] random: set up the NUMA crng instances after the CRNG is fully initialized Until the primary_crng is fully initialized, don't initialize the NUMA crng nodes. Otherwise users of /dev/urandom on NUMA systems before the CRNG is fully initialized can get very bad quality randomness. Of course everyone should move to getrandom(2) where this won't be an issue, but there's a lot of legacy code out there. This related to CVE-2018-1108. Reported-by: Jann Horn Fixes: 1e7f583af67b ("random: make /dev/urandom scalable for silly...") Cc: stable@kernel.org # 4.8+ Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 46 +++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6baa828c0493..02d792f7933f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -787,6 +787,32 @@ static void crng_initialize(struct crng_state *crng) crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } +#ifdef CONFIG_NUMA +static void numa_crng_init(void) +{ + int i; + struct crng_state *crng; + struct crng_state **pool; + + pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); + for_each_online_node(i) { + crng = kmalloc_node(sizeof(struct crng_state), + GFP_KERNEL | __GFP_NOFAIL, i); + spin_lock_init(&crng->lock); + crng_initialize(crng); + pool[i] = crng; + } + mb(); + if (cmpxchg(&crng_node_pool, NULL, pool)) { + for_each_node(i) + kfree(pool[i]); + kfree(pool); + } +} +#else +static void numa_crng_init(void) {} +#endif + /* * crng_fast_load() can be called by code in the interrupt service * path. So we can't afford to dilly-dally. @@ -893,6 +919,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng == &primary_crng && crng_init < 2) { invalidate_batched_entropy(); + numa_crng_init(); crng_init = 2; process_random_ready_list(); wake_up_interruptible(&crng_init_wait); @@ -1727,28 +1754,9 @@ static void init_std_data(struct entropy_store *r) */ static int rand_initialize(void) { -#ifdef CONFIG_NUMA - int i; - struct crng_state *crng; - struct crng_state **pool; -#endif - init_std_data(&input_pool); init_std_data(&blocking_pool); crng_initialize(&primary_crng); - -#ifdef CONFIG_NUMA - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); - for_each_online_node(i) { - crng = kmalloc_node(sizeof(struct crng_state), - GFP_KERNEL | __GFP_NOFAIL, i); - spin_lock_init(&crng->lock); - crng_initialize(crng); - pool[i] = crng; - } - mb(); - crng_node_pool = pool; -#endif return 0; } early_initcall(rand_initialize); From 0bb29a849a6433b72e249eea7695477b02056e94 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 12 Apr 2018 00:50:45 -0400 Subject: [PATCH 101/660] random: crng_reseed() should lock the crng instance that it is modifying Reported-by: Jann Horn Fixes: 1e7f583af67b ("random: make /dev/urandom scalable for silly...") Cc: stable@kernel.org # 4.8+ Signed-off-by: Theodore Ts'o Reviewed-by: Jann Horn --- drivers/char/random.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 02d792f7933f..898233f594b4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -906,7 +906,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) _crng_backtrack_protect(&primary_crng, buf.block, CHACHA20_KEY_SIZE); } - spin_lock_irqsave(&primary_crng.lock, flags); + spin_lock_irqsave(&crng->lock, flags); for (i = 0; i < 8; i++) { unsigned long rv; if (!arch_get_random_seed_long(&rv) && @@ -916,7 +916,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } memzero_explicit(&buf, sizeof(buf)); crng->init_time = jiffies; - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&crng->lock, flags); if (crng == &primary_crng && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); From d848e5f8e1ebdb227d045db55fe4f825e82965fa Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 11 Apr 2018 16:32:17 -0400 Subject: [PATCH 102/660] random: add new ioctl RNDRESEEDCRNG Add a new ioctl which forces the the crng to be reseeded. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- drivers/char/random.c | 13 ++++++++++++- include/uapi/linux/random.h | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 898233f594b4..3cd3aae24d6d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -429,6 +429,7 @@ struct crng_state primary_crng = { static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; +static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) static void _extract_crng(struct crng_state *crng, __u32 out[CHACHA20_BLOCK_WORDS]); @@ -933,7 +934,8 @@ static void _extract_crng(struct crng_state *crng, unsigned long v, flags; if (crng_ready() && - time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)) + (time_after(crng_global_init_time, crng->init_time) || + time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))) crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL); spin_lock_irqsave(&crng->lock, flags); if (arch_get_random_long(&v)) @@ -1757,6 +1759,7 @@ static int rand_initialize(void) init_std_data(&input_pool); init_std_data(&blocking_pool); crng_initialize(&primary_crng); + crng_global_init_time = jiffies; return 0; } early_initcall(rand_initialize); @@ -1930,6 +1933,14 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) input_pool.entropy_count = 0; blocking_pool.entropy_count = 0; return 0; + case RNDRESEEDCRNG: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (crng_init < 2) + return -ENODATA; + crng_reseed(&primary_crng, NULL); + crng_global_init_time = jiffies - 1; + return 0; default: return -EINVAL; } diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index c34f4490d025..26ee91300e3e 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -35,6 +35,9 @@ /* Clear the entropy pool and associated counters. (Superuser only.) */ #define RNDCLEARPOOL _IO( 'R', 0x06 ) +/* Reseed CRNG. (Superuser only.) */ +#define RNDRESEEDCRNG _IO( 'R', 0x07 ) + struct rand_pool_info { int entropy_count; int buf_size; From 494bef4c2a087876e75f3e95f7f63b06d6a65921 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 13 Apr 2018 19:17:22 +0100 Subject: [PATCH 103/660] sfc: insert ARFS filters with replace_equal=true Necessary to allow redirecting a flow when the application moves. Fixes: 3af0f34290f6 ("sfc: replace asynchronous filter operations") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 95682831484e..13b0eb71dbf3 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -851,7 +851,7 @@ static void efx_filter_rfs_work(struct work_struct *data) struct efx_channel *channel = efx_get_channel(efx, req->rxq_index); int rc; - rc = efx->type->filter_insert(efx, &req->spec, false); + rc = efx->type->filter_insert(efx, &req->spec, true); if (rc >= 0) { /* Remember this so we can check whether to expire the filter * later. From a7f80189e41c96c0c6210e9198a31859c91eb3e5 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 13 Apr 2018 19:17:49 +0100 Subject: [PATCH 104/660] sfc: pass the correctly bogus filter_id to rps_may_expire_flow() When we inserted an ARFS filter for ndo_rx_flow_steer(), we didn't know what the filter ID would be, so we just returned 0. Thus, we must also pass 0 as the filter ID when calling rps_may_expire_flow() for it, and rely on the flow_id to identify what we're talking about. Fixes: 3af0f34290f6 ("sfc: replace asynchronous filter operations") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 3 +-- drivers/net/ethernet/sfc/farch.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 50daad0a1482..36f24c7e553a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4776,8 +4776,7 @@ static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, goto out_unlock; } - if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, - flow_id, filter_idx)) { + if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, 0)) { ret = false; goto out_unlock; } diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 4a19c7efdf8d..7174ef5e5c5e 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -2912,7 +2912,7 @@ bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, if (test_bit(index, table->used_bitmap) && table->spec[index].priority == EFX_FILTER_PRI_HINT && rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id, - flow_id, index)) { + flow_id, 0)) { efx_farch_filter_table_clear_entry(efx, table, index); ret = true; } From f993740ee05821307eca03d23d468895740450f8 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 13 Apr 2018 19:18:09 +0100 Subject: [PATCH 105/660] sfc: limit ARFS workitems in flight per channel A misconfigured system (e.g. with all interrupts affinitised to all CPUs) may produce a storm of ARFS steering events. With the existing sfc ARFS implementation, that could create a backlog of workitems that grinds the system to a halt. To prevent this, limit the number of workitems that may be in flight for a given SFC device to 8 (EFX_RPS_MAX_IN_FLIGHT), and return EBUSY from our ndo_rx_flow_steer method if the limit is reached. Given this limit, also store the workitems in an array of slots within the struct efx_nic, rather than dynamically allocating for each request. The limit should not negatively impact performance, because it is only likely to be hit in cases where ARFS will be ineffective anyway. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/net_driver.h | 25 ++++++++++++ drivers/net/ethernet/sfc/rx.c | 58 ++++++++++++++------------- 2 files changed, 55 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 5e379a83c729..eea3808b3f25 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -733,6 +733,27 @@ struct efx_rss_context { u32 rx_indir_table[128]; }; +#ifdef CONFIG_RFS_ACCEL +/** + * struct efx_async_filter_insertion - Request to asynchronously insert a filter + * @net_dev: Reference to the netdevice + * @spec: The filter to insert + * @work: Workitem for this request + * @rxq_index: Identifies the channel for which this request was made + * @flow_id: Identifies the kernel-side flow for which this request was made + */ +struct efx_async_filter_insertion { + struct net_device *net_dev; + struct efx_filter_spec spec; + struct work_struct work; + u16 rxq_index; + u32 flow_id; +}; + +/* Maximum number of ARFS workitems that may be in flight on an efx_nic */ +#define EFX_RPS_MAX_IN_FLIGHT 8 +#endif /* CONFIG_RFS_ACCEL */ + /** * struct efx_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) @@ -850,6 +871,8 @@ struct efx_rss_context { * @rps_expire_channel: Next channel to check for expiry * @rps_expire_index: Next index to check for expiry in * @rps_expire_channel's @rps_flow_id + * @rps_slot_map: bitmap of in-flight entries in @rps_slot + * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work() * @active_queues: Count of RX and TX queues that haven't been flushed and drained. * @rxq_flush_pending: Count of number of receive queues that need to be flushed. * Decremented when the efx_flush_rx_queue() is called. @@ -1004,6 +1027,8 @@ struct efx_nic { struct mutex rps_mutex; unsigned int rps_expire_channel; unsigned int rps_expire_index; + unsigned long rps_slot_map; + struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT]; #endif atomic_t active_queues; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 13b0eb71dbf3..9c593c661cbf 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -827,28 +827,13 @@ MODULE_PARM_DESC(rx_refill_threshold, #ifdef CONFIG_RFS_ACCEL -/** - * struct efx_async_filter_insertion - Request to asynchronously insert a filter - * @net_dev: Reference to the netdevice - * @spec: The filter to insert - * @work: Workitem for this request - * @rxq_index: Identifies the channel for which this request was made - * @flow_id: Identifies the kernel-side flow for which this request was made - */ -struct efx_async_filter_insertion { - struct net_device *net_dev; - struct efx_filter_spec spec; - struct work_struct work; - u16 rxq_index; - u32 flow_id; -}; - static void efx_filter_rfs_work(struct work_struct *data) { struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion, work); struct efx_nic *efx = netdev_priv(req->net_dev); struct efx_channel *channel = efx_get_channel(efx, req->rxq_index); + int slot_idx = req - efx->rps_slot; int rc; rc = efx->type->filter_insert(efx, &req->spec, true); @@ -878,8 +863,8 @@ static void efx_filter_rfs_work(struct work_struct *data) } /* Release references */ + clear_bit(slot_idx, &efx->rps_slot_map); dev_put(req->net_dev); - kfree(req); } int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, @@ -888,22 +873,36 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, struct efx_nic *efx = netdev_priv(net_dev); struct efx_async_filter_insertion *req; struct flow_keys fk; + int slot_idx; + int rc; - if (flow_id == RPS_FLOW_ID_INVALID) - return -EINVAL; + /* find a free slot */ + for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++) + if (!test_and_set_bit(slot_idx, &efx->rps_slot_map)) + break; + if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT) + return -EBUSY; - if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) - return -EPROTONOSUPPORT; + if (flow_id == RPS_FLOW_ID_INVALID) { + rc = -EINVAL; + goto out_clear; + } - if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) - return -EPROTONOSUPPORT; - if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) - return -EPROTONOSUPPORT; + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) { + rc = -EPROTONOSUPPORT; + goto out_clear; + } - req = kmalloc(sizeof(*req), GFP_ATOMIC); - if (!req) - return -ENOMEM; + if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) { + rc = -EPROTONOSUPPORT; + goto out_clear; + } + if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) { + rc = -EPROTONOSUPPORT; + goto out_clear; + } + req = efx->rps_slot + slot_idx; efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT, efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, rxq_index); @@ -933,6 +932,9 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, req->flow_id = flow_id; schedule_work(&req->work); return 0; +out_clear: + clear_bit(slot_idx, &efx->rps_slot_map); + return rc; } bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) From 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 12 Apr 2018 19:11:58 +0100 Subject: [PATCH 106/660] block: do not use interruptible wait anywhere When blk_queue_enter() waits for a queue to unfreeze, or unset the PREEMPT_ONLY flag, do not allow it to be interrupted by a signal. The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably"). Note the SCSI device is resumed asynchronously, i.e. after un-freezing userspace tasks. So that commit exposed the bug as a regression in v4.15. A mysterious SIGBUS (or -EIO) sometimes happened during the time the device was being resumed. Most frequently, there was no kernel log message, and we saw Xorg or Xwayland killed by SIGBUS.[1] [1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979 Without this fix, I get an IO error in this test: # dd if=/dev/sda of=/dev/null iflag=direct & \ while killall -SIGUSR1 dd; do sleep 0.1; done & \ echo mem > /sys/power/state ; \ sleep 5; killall dd # stop after 5 seconds The interruptible wait was added to blk_queue_enter in commit 3ef28e83ab15 ("block: generic request_queue reference counting"). Before then, the interruptible wait was only in blk-mq, but I don't think it could ever have been correct. Reviewed-by: Bart Van Assche Cc: stable@vger.kernel.org Signed-off-by: Alan Jenkins Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 806ce2442819..3e77409e5a84 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -915,7 +915,6 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) while (true) { bool success = false; - int ret; rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { @@ -947,14 +946,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) */ smp_rmb(); - ret = wait_event_interruptible(q->mq_freeze_wq, - (atomic_read(&q->mq_freeze_depth) == 0 && - (preempt || !blk_queue_preempt_only(q))) || - blk_queue_dying(q)); + wait_event(q->mq_freeze_wq, + (atomic_read(&q->mq_freeze_depth) == 0 && + (preempt || !blk_queue_preempt_only(q))) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; - if (ret) - return ret; } } From 1894e916546df0efec9890a5c9954f4ad281494c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Apr 2018 16:24:29 -0600 Subject: [PATCH 107/660] loop: remove cmd->rq member We can always get at the request from the payload, no need to store a pointer to it. Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/loop.c | 36 +++++++++++++++++++----------------- drivers/block/loop.h | 1 - 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c9d04497a415..8b2fde2109fc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -452,9 +452,9 @@ static void lo_complete_rq(struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio && - cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) { - struct bio *bio = cmd->rq->bio; + if (unlikely(req_op(rq) == REQ_OP_READ && cmd->use_aio && + cmd->ret >= 0 && cmd->ret < blk_rq_bytes(rq))) { + struct bio *bio = rq->bio; bio_advance(bio, cmd->ret); zero_fill_bio(bio); @@ -465,11 +465,13 @@ static void lo_complete_rq(struct request *rq) static void lo_rw_aio_do_completion(struct loop_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); + if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(rq); } static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) @@ -487,7 +489,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, { struct iov_iter iter; struct bio_vec *bvec; - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); struct bio *bio = rq->bio; struct file *file = lo->lo_backing_file; unsigned int offset; @@ -1702,15 +1704,16 @@ EXPORT_SYMBOL(loop_unregister_transfer); static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { - struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); - struct loop_device *lo = cmd->rq->q->queuedata; + struct request *rq = bd->rq; + struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct loop_device *lo = rq->q->queuedata; - blk_mq_start_request(bd->rq); + blk_mq_start_request(rq); if (lo->lo_state != Lo_bound) return BLK_STS_IOERR; - switch (req_op(cmd->rq)) { + switch (req_op(rq)) { case REQ_OP_FLUSH: case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: @@ -1723,8 +1726,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && cmd->rq->bio && cmd->rq->bio->bi_css) { - cmd->css = cmd->rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_css) { + cmd->css = rq->bio->bi_css; css_get(cmd->css); } else #endif @@ -1736,8 +1739,9 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, static void loop_handle_cmd(struct loop_cmd *cmd) { - const bool write = op_is_write(req_op(cmd->rq)); - struct loop_device *lo = cmd->rq->q->queuedata; + struct request *rq = blk_mq_rq_from_pdu(cmd); + const bool write = op_is_write(req_op(rq)); + struct loop_device *lo = rq->q->queuedata; int ret = 0; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { @@ -1745,12 +1749,12 @@ static void loop_handle_cmd(struct loop_cmd *cmd) goto failed; } - ret = do_req_filebacked(lo, cmd->rq); + ret = do_req_filebacked(lo, rq); failed: /* complete non-aio request */ if (!cmd->use_aio || ret) { cmd->ret = ret ? -EIO : 0; - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(rq); } } @@ -1767,9 +1771,7 @@ static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq, { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->rq = rq; kthread_init_work(&cmd->work, loop_queue_work); - return 0; } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 0f45416e4fcf..b78de9879f4f 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -66,7 +66,6 @@ struct loop_device { struct loop_cmd { struct kthread_work work; - struct request *rq; bool use_aio; /* use AIO interface to handle I/O */ atomic_t ref; /* only for aio */ long ret; From f9de14bc7e7aac77fd44bb2f62206eb9e494f0d2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Apr 2018 16:25:57 -0600 Subject: [PATCH 108/660] loop: handle short DIO reads We ran into an issue with loop and btrfs, where btrfs would complain about checksum errors. It turns out that is because we don't handle short reads at all, we just zero fill the remainder. Worse than that, we don't handle the filling properly, which results in loop trying to advance a single bio by much more than its size, since it doesn't take chaining into account. Handle short reads appropriately, by simply retrying at the new correct offset. End the remainder of the request with EIO, if we get a 0 read. Fixes: bc07c10a3603 ("block: loop: support DIO & AIO") Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/loop.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8b2fde2109fc..5d4e31655d96 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -451,16 +451,36 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq) static void lo_complete_rq(struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + blk_status_t ret = BLK_STS_OK; - if (unlikely(req_op(rq) == REQ_OP_READ && cmd->use_aio && - cmd->ret >= 0 && cmd->ret < blk_rq_bytes(rq))) { - struct bio *bio = rq->bio; - - bio_advance(bio, cmd->ret); - zero_fill_bio(bio); + if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || + req_op(rq) != REQ_OP_READ) { + if (cmd->ret < 0) + ret = BLK_STS_IOERR; + goto end_io; } - blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK); + /* + * Short READ - if we got some data, advance our request and + * retry it. If we got no data, end the rest with EIO. + */ + if (cmd->ret) { + blk_update_request(rq, BLK_STS_OK, cmd->ret); + cmd->ret = 0; + blk_mq_requeue_request(rq, true); + } else { + if (cmd->use_aio) { + struct bio *bio = rq->bio; + + while (bio) { + zero_fill_bio(bio); + bio = bio->bi_next; + } + } + ret = BLK_STS_IOERR; +end_io: + blk_mq_end_request(rq, ret); + } } static void lo_rw_aio_do_completion(struct loop_cmd *cmd) From c246fd333f84e6a0a8572f991637aa102f5e1865 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Sun, 15 Apr 2018 16:07:12 +0800 Subject: [PATCH 109/660] filter.txt: update 'tools/net/' to 'tools/bpf/' The tools are located at tootls/bpf/ instead of tools/net/. Update the filter.txt doc. Signed-off-by: Wang Sheng-Hui Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index a4508ec1816b..fd55c7de9991 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -169,7 +169,7 @@ access to BPF code as well. BPF engine and instruction set ------------------------------ -Under tools/net/ there's a small helper tool called bpf_asm which can +Under tools/bpf/ there's a small helper tool called bpf_asm which can be used to write low-level filters for example scenarios mentioned in the previous section. Asm-like syntax mentioned here has been implemented in bpf_asm and will be used for further explanations (instead of dealing with @@ -359,7 +359,7 @@ $ ./bpf_asm -c foo In particular, as usage with xt_bpf or cls_bpf can result in more complex BPF filters that might not be obvious at first, it's good to test filters before attaching to a live system. For that purpose, there's a small tool called -bpf_dbg under tools/net/ in the kernel source directory. This debugger allows +bpf_dbg under tools/bpf/ in the kernel source directory. This debugger allows for testing BPF filters against given pcap files, single stepping through the BPF code on the pcap's packets and to do BPF machine register dumps. @@ -483,7 +483,7 @@ Example output from dmesg: [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00 [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3 -In the kernel source tree under tools/net/, there's bpf_jit_disasm for +In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for generating disassembly out of the kernel log's hexdump: # ./bpf_jit_disasm From a24cd490739586a7d2da3549a1844e1d7c4f4fc4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2018 23:50:31 -0400 Subject: [PATCH 110/660] hypfs_kill_super(): deal with failed allocations hypfs_fill_super() might fail to allocate sbi; hypfs_kill_super() should not oops on that. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/s390/hypfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 43bbe63e2992..06b513d192b9 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -320,7 +320,7 @@ static void hypfs_kill_super(struct super_block *sb) if (sb->s_root) hypfs_delete_tree(sb->s_root); - if (sb_info->update_file) + if (sb_info && sb_info->update_file) hypfs_remove(sb_info->update_file); kfree(sb->s_fs_info); sb->s_fs_info = NULL; From c66b23c2840446a82c389e4cb1a12eb2a71fa2e4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2018 23:56:44 -0400 Subject: [PATCH 111/660] jffs2_kill_sb(): deal with failed allocations jffs2_fill_super() might fail to allocate jffs2_sb_info; jffs2_kill_sb() must survive that. Cc: stable@kernel.org Signed-off-by: Al Viro --- fs/jffs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f60dee7faf03..87bdf0f4cba1 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -342,7 +342,7 @@ static void jffs2_put_super (struct super_block *sb) static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (!sb_rdonly(sb)) + if (c && !sb_rdonly(sb)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); From 659038428cb43a66e3eff71e2c845c9de3611a98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 3 Apr 2018 00:13:17 -0400 Subject: [PATCH 112/660] orangefs_kill_sb(): deal with allocation failures orangefs_fill_sb() might've failed to allocate ORANGEFS_SB(s); don't oops in that case. Cc: stable@kernel.org Signed-off-by: Al Viro --- fs/orangefs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 3ae5fdba0225..10796d3fe27d 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -579,6 +579,11 @@ void orangefs_kill_sb(struct super_block *sb) /* provided sb cleanup */ kill_anon_super(sb); + if (!ORANGEFS_SB(sb)) { + mutex_lock(&orangefs_request_mutex); + mutex_unlock(&orangefs_request_mutex); + return; + } /* * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock From 4a3877c4cedd95543f8726b0a98743ed8db0c0fb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 3 Apr 2018 01:15:46 -0400 Subject: [PATCH 113/660] rpc_pipefs: fix double-dput() if we ever hit rpc_gssd_dummy_depopulate() dentry passed to it has refcount equal to 1. __rpc_rmpipe() drops it and dput() done after that hits an already freed dentry. Cc: stable@kernel.org Signed-off-by: Al Viro --- net/sunrpc/rpc_pipe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 0f08934b2cea..c81ef5e6c981 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1375,6 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) struct dentry *clnt_dir = pipe_dentry->d_parent; struct dentry *gssd_dir = clnt_dir->d_parent; + dget(pipe_dentry); __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); From 8e04944f0ea8b838399049bdcda920ab36ae3b04 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 4 Apr 2018 19:53:07 +0900 Subject: [PATCH 114/660] mm,vmscan: Allow preallocating memory for register_shrinker(). syzbot is catching so many bugs triggered by commit 9ee332d99e4d5a97 ("sget(): handle failures of register_shrinker()"). That commit expected that calling kill_sb() from deactivate_locked_super() without successful fill_super() is safe, but the reality was different; some callers assign attributes which are needed for kill_sb() after sget() succeeds. For example, [1] is a report where sb->s_mode (which seems to be either FMODE_READ | FMODE_EXCL | FMODE_WRITE or FMODE_READ | FMODE_EXCL) is not assigned unless sget() succeeds. But it does not worth complicate sget() so that register_shrinker() failure path can safely call kill_block_super() via kill_sb(). Making alloc_super() fail if memory allocation for register_shrinker() failed is much simpler. Let's avoid calling deactivate_locked_super() from sget_userns() by preallocating memory for the shrinker and making register_shrinker() in sget_userns() never fail. [1] https://syzkaller.appspot.com/bug?id=588996a25a2587be2e3a54e8646728fb9cae44e7 Signed-off-by: Tetsuo Handa Reported-by: syzbot Cc: Al Viro Cc: Michal Hocko Signed-off-by: Al Viro --- fs/super.c | 9 ++++----- include/linux/shrinker.h | 7 +++++-- mm/vmscan.c | 21 ++++++++++++++++++++- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/super.c b/fs/super.c index 5fa9a8d8d865..122c402049a2 100644 --- a/fs/super.c +++ b/fs/super.c @@ -167,6 +167,7 @@ static void destroy_unused_super(struct super_block *s) security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); + free_prealloced_shrinker(&s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); } @@ -252,6 +253,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_shrink.count_objects = super_cache_count; s->s_shrink.batch = 1024; s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; + if (prealloc_shrinker(&s->s_shrink)) + goto fail; return s; fail: @@ -518,11 +521,7 @@ retry: hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - err = register_shrinker(&s->s_shrink); - if (err) { - deactivate_locked_super(s); - s = ERR_PTR(err); - } + register_shrinker_prepared(&s->s_shrink); return s; } diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 388ff2936a87..6794490f25b2 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -75,6 +75,9 @@ struct shrinker { #define SHRINKER_NUMA_AWARE (1 << 0) #define SHRINKER_MEMCG_AWARE (1 << 1) -extern int register_shrinker(struct shrinker *); -extern void unregister_shrinker(struct shrinker *); +extern int prealloc_shrinker(struct shrinker *shrinker); +extern void register_shrinker_prepared(struct shrinker *shrinker); +extern int register_shrinker(struct shrinker *shrinker); +extern void unregister_shrinker(struct shrinker *shrinker); +extern void free_prealloced_shrinker(struct shrinker *shrinker); #endif diff --git a/mm/vmscan.c b/mm/vmscan.c index 8b920ce3ae02..9b697323a88c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -303,7 +303,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone /* * Add a shrinker callback to be called from the vm. */ -int register_shrinker(struct shrinker *shrinker) +int prealloc_shrinker(struct shrinker *shrinker) { size_t size = sizeof(*shrinker->nr_deferred); @@ -313,10 +313,29 @@ int register_shrinker(struct shrinker *shrinker) shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); if (!shrinker->nr_deferred) return -ENOMEM; + return 0; +} +void free_prealloced_shrinker(struct shrinker *shrinker) +{ + kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; +} + +void register_shrinker_prepared(struct shrinker *shrinker) +{ down_write(&shrinker_rwsem); list_add_tail(&shrinker->list, &shrinker_list); up_write(&shrinker_rwsem); +} + +int register_shrinker(struct shrinker *shrinker) +{ + int err = prealloc_shrinker(shrinker); + + if (err) + return err; + register_shrinker_prepared(shrinker); return 0; } EXPORT_SYMBOL(register_shrinker); From dccccd332d028f57358a8b64ca88e691fc8be5b7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Apr 2018 18:22:14 +0200 Subject: [PATCH 115/660] s390/sclp: avoid potential usage of uninitialized value sclp_early_printk could be used before .bss section is zeroed (i.e. from als.c during the decompressor phase), therefore values used by sclp_early_printk should be located in the .data section. Another reason for that is to avoid potential initrd corruption, if some code in future would use sclp_early_printk before initrd is moved from possibly overlapping with .bss section region to a safe location. Fixes: 0b0d1173d8ae ("s390/sclp: 32 bit event mask compatibility mode") Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_early_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 5f8d9ea69ebd..eceba3858cef 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -18,7 +18,7 @@ int sclp_init_state __section(.data) = sclp_init_state_uninitialized; * Used to keep track of the size of the event masks. Qemu until version 2.11 * only supports 4 and needs a workaround. */ -bool sclp_mask_compat_mode; +bool sclp_mask_compat_mode __section(.data); void sclp_early_wait_irq(void) { From 760dd0eeaec1689430243ead14e5a429613d8c52 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 3 Apr 2018 11:08:52 +0200 Subject: [PATCH 116/660] s390/smsgiucv: disable SMSG on module unload The module exit function of the smsgiucv module uses the incorrect CP command to disable SMSG messages. The correct command is "SET SMSG OFF". Use it. Signed-off-by: Martin Schwidefsky --- drivers/s390/net/smsgiucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index 3b0c8b8a7634..066b5c3aaae6 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -176,7 +176,7 @@ static struct device_driver smsg_driver = { static void __exit smsg_exit(void) { - cpcmd("SET SMSG IUCV", NULL, 0, NULL); + cpcmd("SET SMSG OFF", NULL, 0, NULL); device_unregister(smsg_dev); iucv_unregister(&smsg_handler, 1); driver_unregister(&smsg_driver); From 15ceb8c936d13d940ca9e53996fbd05a26ce96db Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Tue, 27 Jun 2017 12:44:11 +0200 Subject: [PATCH 117/660] s390/kexec_file: Prepare setup.h for kexec_file_load kexec_file_load needs to prepare the new kernels before they are loaded. For that it has to know the offsets in head.S, e.g. to register the new command line. Unfortunately there are no macros right now defining those offsets. Define them now. Signed-off-by: Philipp Rudo Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 40 +++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 124154fdfc97..9c30ebe046f3 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * S390 version - * Copyright IBM Corp. 1999, 2010 + * Copyright IBM Corp. 1999, 2017 */ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H @@ -37,17 +37,31 @@ #define LPP_MAGIC _BITUL(31) #define LPP_PID_MASK _AC(0xffffffff, UL) +/* Offsets to entry points in kernel/head.S */ + +#define STARTUP_NORMAL_OFFSET 0x10000 +#define STARTUP_KDUMP_OFFSET 0x10010 + +/* Offsets to parameters in kernel/head.S */ + +#define IPL_DEVICE_OFFSET 0x10400 +#define INITRD_START_OFFSET 0x10408 +#define INITRD_SIZE_OFFSET 0x10410 +#define OLDMEM_BASE_OFFSET 0x10418 +#define OLDMEM_SIZE_OFFSET 0x10420 +#define COMMAND_LINE_OFFSET 0x10480 + #ifndef __ASSEMBLY__ #include #include -#define IPL_DEVICE (*(unsigned long *) (0x10400)) -#define INITRD_START (*(unsigned long *) (0x10408)) -#define INITRD_SIZE (*(unsigned long *) (0x10410)) -#define OLDMEM_BASE (*(unsigned long *) (0x10418)) -#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) -#define COMMAND_LINE ((char *) (0x10480)) +#define IPL_DEVICE (*(unsigned long *) (IPL_DEVICE_OFFSET)) +#define INITRD_START (*(unsigned long *) (INITRD_START_OFFSET)) +#define INITRD_SIZE (*(unsigned long *) (INITRD_SIZE_OFFSET)) +#define OLDMEM_BASE (*(unsigned long *) (OLDMEM_BASE_OFFSET)) +#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET)) +#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET)) extern int memory_end_set; extern unsigned long memory_end; @@ -121,12 +135,12 @@ extern void (*_machine_power_off)(void); #else /* __ASSEMBLY__ */ -#define IPL_DEVICE 0x10400 -#define INITRD_START 0x10408 -#define INITRD_SIZE 0x10410 -#define OLDMEM_BASE 0x10418 -#define OLDMEM_SIZE 0x10420 -#define COMMAND_LINE 0x10480 +#define IPL_DEVICE (IPL_DEVICE_OFFSET) +#define INITRD_START (INITRD_START_OFFSET) +#define INITRD_SIZE (INITRD_SIZE_OFFSET) +#define OLDMEM_BASE (OLDMEM_BASE_OFFSET) +#define OLDMEM_SIZE (OLDMEM_SIZE_OFFSET) +#define COMMAND_LINE (COMMAND_LINE_OFFSET) #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_SETUP_H */ From 840798a1f52994c172270893bd2ec6013cc92e40 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Mon, 28 Aug 2017 15:32:36 +0200 Subject: [PATCH 118/660] s390/kexec_file: Add purgatory The common code expects the architecture to have a purgatory that runs between the two kernels. Add it now. For simplicity first skip crash support. Signed-off-by: Philipp Rudo Signed-off-by: Martin Schwidefsky --- arch/s390/Kbuild | 1 + arch/s390/Kconfig | 4 ++ arch/s390/include/asm/purgatory.h | 17 ++++++ arch/s390/kernel/asm-offsets.c | 5 ++ arch/s390/purgatory/Makefile | 37 ++++++++++++ arch/s390/purgatory/head.S | 96 +++++++++++++++++++++++++++++++ arch/s390/purgatory/purgatory.c | 38 ++++++++++++ 7 files changed, 198 insertions(+) create mode 100644 arch/s390/include/asm/purgatory.h create mode 100644 arch/s390/purgatory/Makefile create mode 100644 arch/s390/purgatory/head.S create mode 100644 arch/s390/purgatory/purgatory.c diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index 9fdff3fe1a42..e63940bb57cd 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -8,3 +8,4 @@ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_NUMA) += numa/ +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 32a0d5b958bf..5b8d0859b317 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -51,6 +51,10 @@ config KEXEC def_bool y select KEXEC_CORE +config ARCH_HAS_KEXEC_PURGATORY + def_bool y + depends on KEXEC_FILE + config AUDIT_ARCH def_bool y diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h new file mode 100644 index 000000000000..e297bcfc476f --- /dev/null +++ b/arch/s390/include/asm/purgatory.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo + */ + +#ifndef _S390_PURGATORY_H_ +#define _S390_PURGATORY_H_ +#ifndef __ASSEMBLY__ + +#include + +int verify_sha256_digest(void); + +#endif /* __ASSEMBLY__ */ +#endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index cfe2c45c5180..eb2a5c0443cd 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -204,5 +205,9 @@ int main(void) OFFSET(__GMAP_ASCE, gmap, asce); OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c); OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20); + /* kexec_sha_region */ + OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start); + OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len); + DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region)); return 0; } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile new file mode 100644 index 000000000000..e9525bc1b4a6 --- /dev/null +++ b/arch/s390/purgatory/Makefile @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0 + +OBJECT_FILES_NON_STANDARD := y + +purgatory-y := head.o purgatory.o string.o sha256.o mem.o + +targets += $(purgatory-y) purgatory.ro kexec-purgatory.c +PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) + +$(obj)/sha256.o: $(srctree)/lib/sha256.c + $(call if_changed_rule,cc_o_c) + +$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S + $(call if_changed_rule,as_o_S) + +$(obj)/string.o: $(srctree)/arch/s390/lib/string.c + $(call if_changed_rule,cc_o_c) + +LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib +LDFLAGS_purgatory.ro += -z nodefaultlib +KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare +KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding +KBUILD_CFLAGS += -c -MD -Os -m64 +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) + +$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) + +CMD_BIN2C = $(objtree)/scripts/basic/bin2c +quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@ + +$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE + $(call if_changed,bin2c) + +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S new file mode 100644 index 000000000000..8735409d0280 --- /dev/null +++ b/arch/s390/purgatory/head.S @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Purgatory setup code + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo + */ + +#include +#include +#include +#include + +/* The purgatory is the code running between two kernels. It's main purpose + * is to verify that the next kernel was not corrupted after load and to + * start it. + */ + +.macro START_NEXT_KERNEL base + lg %r4,kernel_entry-\base(%r13) + lg %r5,load_psw_mask-\base(%r13) + ogr %r4,%r5 + stg %r4,0(%r0) + + xgr %r0,%r0 + diag %r0,%r0,0x308 +.endm + +.text +.align PAGE_SIZE +ENTRY(purgatory_start) + /* The purgatory might be called after a diag308 so better set + * architecture and addressing mode. + */ + lhi %r1,1 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE + sam64 + + larl %r5,gprregs + stmg %r6,%r15,0(%r5) + + basr %r13,0 +.base_crash: + + /* Setup stack */ + larl %r15,purgatory_end + aghi %r15,-160 + +.do_checksum_verification: + brasl %r14,verify_sha256_digest + + cghi %r2,0 /* checksum match */ + jne .disabled_wait + + /* start normal kernel */ + START_NEXT_KERNEL .base_crash + +.disabled_wait: + lpswe disabled_wait_psw-.base_crash(%r13) + + +load_psw_mask: + .long 0x00080000,0x80000000 + + .align 8 +disabled_wait_psw: + .quad 0x0002000180000000 + .quad 0x0000000000000000 + .do_checksum_verification + +gprregs: + .rept 10 + .quad 0 + .endr + +purgatory_sha256_digest: + .global purgatory_sha256_digest + .rept 32 /* SHA256_DIGEST_SIZE */ + .byte 0 + .endr + +purgatory_sha_regions: + .global purgatory_sha_regions + .rept 16 * __KEXEC_SHA_REGION_SIZE /* KEXEC_SEGMENTS_MAX */ + .byte 0 + .endr + +kernel_entry: + .global kernel_entry + .quad 0 + + .align PAGE_SIZE +stack: + .skip PAGE_SIZE + .align PAGE_SIZE +purgatory_end: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c new file mode 100644 index 000000000000..52b92f2bf0b9 --- /dev/null +++ b/arch/s390/purgatory/purgatory.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Purgatory code running between two kernels. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo + */ + +#include +#include +#include +#include + +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; + +u64 kernel_entry; + +int verify_sha256_digest(void) +{ + struct kexec_sha_region *ptr, *end; + u8 digest[SHA256_DIGEST_SIZE]; + struct sha256_state sctx; + + sha256_init(&sctx); + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + + for (ptr = purgatory_sha_regions; ptr < end; ptr++) + sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len); + + sha256_final(&sctx, digest); + + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest))) + return 1; + + return 0; +} From 71406883fd35794d573b3085433c41d0a3bf6c21 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Mon, 19 Jun 2017 10:45:33 +0200 Subject: [PATCH 119/660] s390/kexec_file: Add kexec_file_load system call This patch adds the kexec_file_load system call to s390 as well as the arch specific functions common code requires to work. Loaders for the different file types will be added later. Signed-off-by: Philipp Rudo Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 13 +++ arch/s390/configs/default_defconfig | 1 + arch/s390/kernel/Makefile | 2 + arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/machine_kexec_file.c | 126 ++++++++++++++++++++++++++ arch/s390/kernel/syscalls/syscall.tbl | 1 + 6 files changed, 144 insertions(+) create mode 100644 arch/s390/kernel/machine_kexec_file.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5b8d0859b317..3223ce0680c0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -51,6 +51,19 @@ config KEXEC def_bool y select KEXEC_CORE +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select BUILD_BIN2C + depends on CRYPTO + depends on CRYPTO_SHA256 + depends on CRYPTO_SHA256_S390 + ---help--- + This is new version of kexec system call. This system call is + file based and takes file descriptors as system call argument + for kernel and initramfs as opposed to list of segments as + accepted by previous system call. + config ARCH_HAS_KEXEC_PURGATORY def_bool y depends on KEXEC_FILE diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 5af8458951cf..2310315b0744 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -719,3 +719,4 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_KVM_S390_UCONTROL=y CONFIG_VHOST_NET=m +CONFIG_KEXEC_FILE=y diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index b06a6f79c1ec..35bec1ab84e3 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -82,6 +82,8 @@ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o + obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 11e9d8b5c1b0..607c5e9fba3d 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -182,3 +182,4 @@ COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); +COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c new file mode 100644 index 000000000000..d9b4f9d23e9f --- /dev/null +++ b/arch/s390/kernel/machine_kexec_file.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * s390 code for kexec_file_load system call + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo + */ + +#include +#include +#include + +const struct kexec_file_ops * const kexec_file_loaders[] = { + NULL, +}; + +/* + * The kernel is loaded to a fixed location. Turn off kexec_locate_mem_hole + * and provide kbuf->mem by hand. + */ +int arch_kexec_walk_mem(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) +{ + return 1; +} + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + Elf_Rela *relas; + int i; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_shndx == SHN_UNDEF) + return -ENOEXEC; + + if (sym->st_shndx == SHN_COMMON) + return -ENOEXEC; + + if (sym->st_shndx >= pi->ehdr->e_shnum && + sym->st_shndx != SHN_ABS) + return -ENOEXEC; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + val = sym->st_value; + if (sym->st_shndx != SHN_ABS) + val += pi->sechdrs[sym->st_shndx].sh_addr; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + switch (ELF64_R_TYPE(relas[i].r_info)) { + case R_390_8: /* Direct 8 bit. */ + *(u8 *)loc = val; + break; + case R_390_12: /* Direct 12 bit. */ + *(u16 *)loc &= 0xf000; + *(u16 *)loc |= val & 0xfff; + break; + case R_390_16: /* Direct 16 bit. */ + *(u16 *)loc = val; + break; + case R_390_20: /* Direct 20 bit. */ + *(u32 *)loc &= 0xf00000ff; + *(u32 *)loc |= (val & 0xfff) << 16; /* DL */ + *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */ + break; + case R_390_32: /* Direct 32 bit. */ + *(u32 *)loc = val; + break; + case R_390_64: /* Direct 64 bit. */ + *(u64 *)loc = val; + break; + case R_390_PC16: /* PC relative 16 bit. */ + *(u16 *)loc = (val - addr); + break; + case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ + *(u16 *)loc = (val - addr) >> 1; + break; + case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */ + *(u32 *)loc = (val - addr) >> 1; + break; + case R_390_PC32: /* PC relative 32 bit. */ + *(u32 *)loc = (val - addr); + break; + case R_390_PC64: /* PC relative 64 bit. */ + *(u64 *)loc = (val - addr); + break; + default: + break; + } + } + return 0; +} + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + /* A kernel must be at least large enough to contain head.S. During + * load memory in head.S will be accessed, e.g. to register the next + * command line. If the next kernel were smaller the current kernel + * will panic at load. + * + * 0x11000 = sizeof(head.S) + */ + if (buf_len < 0x11000) + return -ENOEXEC; + + return kexec_image_probe_default(image, buf, buf_len); +} diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index b38d48464368..8b210ead7956 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -388,3 +388,4 @@ 378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage 379 common statx sys_statx compat_sys_statx 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load From e49bb0a27fa3c6ec45cc13e2102a6ec13c4ae697 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 30 Aug 2017 14:03:38 +0200 Subject: [PATCH 120/660] s390/kexec_file: Add image loader Add an image loader for kexec_file_load. For simplicity first skip crash support. The functions defined in machine_kexec_file will later be shared with the ELF loader. Signed-off-by: Philipp Rudo Reviewed-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kexec.h | 22 ++++++++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/kexec_image.c | 78 +++++++++++++++++++++++++++ arch/s390/kernel/machine_kexec_file.c | 75 ++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 arch/s390/kernel/kexec_image.c diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 1d708a419326..4c9da9974cf4 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -46,4 +46,26 @@ static inline void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) { } +struct kimage; +struct s390_load_data { + /* Pointer to the kernel buffer. Used to register cmdline etc.. */ + void *kernel_buf; + + /* Total size of loaded segments in memory. Used as an offset. */ + size_t memsz; + + /* Load address of initrd. Used to register INITRD_START in kernel. */ + unsigned long initrd_load_addr; +}; + +int kexec_file_add_purgatory(struct kimage *image, + struct s390_load_data *data); +int kexec_file_add_initrd(struct kimage *image, + struct s390_load_data *data, + char *initrd, unsigned long initrd_len); +int *kexec_file_update_kernel(struct kimage *iamge, + struct s390_load_data *data); + +extern const struct kexec_file_ops s390_kexec_image_ops; + #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 35bec1ab84e3..a84e9611c5c7 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -82,7 +82,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o -obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c new file mode 100644 index 000000000000..7f5021e6c242 --- /dev/null +++ b/arch/s390/kernel/kexec_image.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Image loader for kexec_file_load system call. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo + */ + +#include +#include +#include +#include + +static int kexec_file_add_image_kernel(struct kimage *image, + struct s390_load_data *data, + char *kernel, unsigned long kernel_len) +{ + struct kexec_buf buf; + int ret; + + buf.image = image; + + buf.buffer = kernel + STARTUP_NORMAL_OFFSET; + buf.bufsz = kernel_len - STARTUP_NORMAL_OFFSET; + + buf.mem = STARTUP_NORMAL_OFFSET; + buf.memsz = buf.bufsz; + + ret = kexec_add_buffer(&buf); + + data->kernel_buf = kernel; + data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET; + + return ret; +} + +static void *s390_image_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + struct s390_load_data data = {0}; + int ret; + + /* We don't support crash kernels yet. */ + if (image->type == KEXEC_TYPE_CRASH) + return ERR_PTR(-ENOTSUPP); + + ret = kexec_file_add_image_kernel(image, &data, kernel, kernel_len); + if (ret) + return ERR_PTR(ret); + + if (initrd) { + ret = kexec_file_add_initrd(image, &data, initrd, initrd_len); + if (ret) + return ERR_PTR(ret); + } + + ret = kexec_file_add_purgatory(image, &data); + if (ret) + return ERR_PTR(ret); + + return kexec_file_update_kernel(image, &data); +} + +static int s390_image_probe(const char *buf, unsigned long len) +{ + /* Can't reliably tell if an image is valid. Therefore give the + * user whatever he wants. + */ + return 0; +} + +const struct kexec_file_ops s390_kexec_image_ops = { + .probe = s390_image_probe, + .load = s390_image_load, +}; diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index d9b4f9d23e9f..2a2ceece77b0 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,9 +12,84 @@ #include const struct kexec_file_ops * const kexec_file_loaders[] = { + &s390_kexec_image_ops, NULL, }; +int *kexec_file_update_kernel(struct kimage *image, + struct s390_load_data *data) +{ + unsigned long *loc; + + if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE) + return ERR_PTR(-EINVAL); + + if (image->cmdline_buf_len) + memcpy(data->kernel_buf + COMMAND_LINE_OFFSET, + image->cmdline_buf, image->cmdline_buf_len); + + if (image->initrd_buf) { + loc = (unsigned long *)(data->kernel_buf + INITRD_START_OFFSET); + *loc = data->initrd_load_addr; + + loc = (unsigned long *)(data->kernel_buf + INITRD_SIZE_OFFSET); + *loc = image->initrd_buf_len; + } + + return NULL; +} + +static int kexec_file_update_purgatory(struct kimage *image) +{ + u64 entry, type; + int ret; + + entry = STARTUP_NORMAL_OFFSET; + ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry, + sizeof(entry), false); + return ret; +} + +int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data) +{ + struct kexec_buf buf; + int ret; + + buf.image = image; + + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; + + ret = kexec_load_purgatory(image, &buf); + if (ret) + return ret; + + ret = kexec_file_update_purgatory(image); + return ret; +} + +int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data, + char *initrd, unsigned long initrd_len) +{ + struct kexec_buf buf; + int ret; + + buf.image = image; + + buf.buffer = initrd; + buf.bufsz = initrd_len; + + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; + buf.memsz = buf.bufsz; + + data->initrd_load_addr = buf.mem; + data->memsz += buf.memsz; + + ret = kexec_add_buffer(&buf); + return ret; +} + /* * The kernel is loaded to a fixed location. Turn off kexec_locate_mem_hole * and provide kbuf->mem by hand. From ee337f5469fd67f22d231e520ec4189ce0589d92 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Tue, 5 Sep 2017 11:55:23 +0200 Subject: [PATCH 121/660] s390/kexec_file: Add crash support to image loader Add support to load a crash kernel to the image loader. This requires extending the purgatory. Signed-off-by: Philipp Rudo Reviewed-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kexec_image.c | 6 +- arch/s390/kernel/machine_kexec_file.c | 45 ++++++- arch/s390/purgatory/head.S | 185 +++++++++++++++++++++++++- arch/s390/purgatory/purgatory.c | 4 + 4 files changed, 234 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c index 7f5021e6c242..3800852595e8 100644 --- a/arch/s390/kernel/kexec_image.c +++ b/arch/s390/kernel/kexec_image.c @@ -25,6 +25,8 @@ static int kexec_file_add_image_kernel(struct kimage *image, buf.bufsz = kernel_len - STARTUP_NORMAL_OFFSET; buf.mem = STARTUP_NORMAL_OFFSET; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; buf.memsz = buf.bufsz; ret = kexec_add_buffer(&buf); @@ -43,10 +45,6 @@ static void *s390_image_load(struct kimage *image, struct s390_load_data data = {0}; int ret; - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return ERR_PTR(-ENOTSUPP); - ret = kexec_file_add_image_kernel(image, &data, kernel, kernel_len); if (ret) return ERR_PTR(ret); diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 2a2ceece77b0..75aea2c1d823 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -28,6 +28,14 @@ int *kexec_file_update_kernel(struct kimage *image, memcpy(data->kernel_buf + COMMAND_LINE_OFFSET, image->cmdline_buf, image->cmdline_buf_len); + if (image->type == KEXEC_TYPE_CRASH) { + loc = (unsigned long *)(data->kernel_buf + OLDMEM_BASE_OFFSET); + *loc = crashk_res.start; + + loc = (unsigned long *)(data->kernel_buf + OLDMEM_SIZE_OFFSET); + *loc = crashk_res.end - crashk_res.start + 1; + } + if (image->initrd_buf) { loc = (unsigned long *)(data->kernel_buf + INITRD_START_OFFSET); *loc = data->initrd_load_addr; @@ -44,9 +52,40 @@ static int kexec_file_update_purgatory(struct kimage *image) u64 entry, type; int ret; - entry = STARTUP_NORMAL_OFFSET; + if (image->type == KEXEC_TYPE_CRASH) { + entry = STARTUP_KDUMP_OFFSET; + type = KEXEC_TYPE_CRASH; + } else { + entry = STARTUP_NORMAL_OFFSET; + type = KEXEC_TYPE_DEFAULT; + } + ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry, sizeof(entry), false); + if (ret) + return ret; + + ret = kexec_purgatory_get_set_symbol(image, "kernel_type", &type, + sizeof(type), false); + if (ret) + return ret; + + if (image->type == KEXEC_TYPE_CRASH) { + u64 crash_size; + + ret = kexec_purgatory_get_set_symbol(image, "crash_start", + &crashk_res.start, + sizeof(crashk_res.start), + false); + if (ret) + return ret; + + crash_size = crashk_res.end - crashk_res.start + 1; + ret = kexec_purgatory_get_set_symbol(image, "crash_size", + &crash_size, + sizeof(crash_size), + false); + } return ret; } @@ -59,6 +98,8 @@ int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data) data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; ret = kexec_load_purgatory(image, &buf); if (ret) @@ -81,6 +122,8 @@ int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; buf.memsz = buf.bufsz; data->initrd_load_addr = buf.mem; diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 8735409d0280..660c96a05a9b 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -15,8 +15,52 @@ /* The purgatory is the code running between two kernels. It's main purpose * is to verify that the next kernel was not corrupted after load and to * start it. + * + * If the next kernel is a crash kernel there are some peculiarities to + * consider: + * + * First the purgatory is called twice. Once only to verify the + * sha digest. So if the crash kernel got corrupted the old kernel can try + * to trigger a stand-alone dumper. And once to actually load the crash kernel. + * + * Second the purgatory also has to swap the crash memory region with its + * destination at address 0. As the purgatory is part of crash memory this + * requires some finesse. The tactic here is that the purgatory first copies + * itself to the end of the destination and then swaps the rest of the + * memory running from there. */ +#define bufsz purgatory_end-stack + +.macro MEMCPY dst,src,len + lgr %r0,\dst + lgr %r1,\len + lgr %r2,\src + lgr %r3,\len + +20: mvcle %r0,%r2,0 + jo 20b +.endm + +.macro MEMSWAP dst,src,buf,len +10: cghi \len,bufsz + jh 11f + lgr %r4,\len + j 12f +11: lghi %r4,bufsz + +12: MEMCPY \buf,\dst,%r4 + MEMCPY \dst,\src,%r4 + MEMCPY \src,\buf,%r4 + + agr \dst,%r4 + agr \src,%r4 + sgr \len,%r4 + + cghi \len,0 + jh 10b +.endm + .macro START_NEXT_KERNEL base lg %r4,kernel_entry-\base(%r13) lg %r5,load_psw_mask-\base(%r13) @@ -47,18 +91,144 @@ ENTRY(purgatory_start) larl %r15,purgatory_end aghi %r15,-160 + /* If the next kernel is KEXEC_TYPE_CRASH the purgatory is called + * directly with a flag passed in %r2 whether the purgatory shall do + * checksum verification only (%r2 = 0 -> verification only). + * + * Check now and preserve over C function call by storing in + * %r10 whith + * 1 -> checksum verification only + * 0 -> load new kernel + */ + lghi %r10,0 + lg %r11,kernel_type-.base_crash(%r13) + cghi %r11,1 /* KEXEC_TYPE_CRASH */ + jne .do_checksum_verification + cghi %r2,0 /* checksum verification only */ + jne .do_checksum_verification + lghi %r10,1 + .do_checksum_verification: brasl %r14,verify_sha256_digest + cghi %r10,1 /* checksum verification only */ + je .return_old_kernel cghi %r2,0 /* checksum match */ jne .disabled_wait + /* If the next kernel is a crash kernel the purgatory has to swap + * the mem regions first. + */ + cghi %r11,1 /* KEXEC_TYPE_CRASH */ + je .start_crash_kernel + /* start normal kernel */ START_NEXT_KERNEL .base_crash +.return_old_kernel: + lmg %r6,%r15,gprregs-.base_crash(%r13) + br %r14 + .disabled_wait: lpswe disabled_wait_psw-.base_crash(%r13) +.start_crash_kernel: + /* Location of purgatory_start in crash memory */ + lgr %r8,%r13 + aghi %r8,-(.base_crash-purgatory_start) + + /* Destination for this code i.e. end of memory to be swapped. */ + lg %r9,crash_size-.base_crash(%r13) + aghi %r9,-(purgatory_end-purgatory_start) + + /* Destination in crash memory, i.e. same as r9 but in crash memory. */ + lg %r10,crash_start-.base_crash(%r13) + agr %r10,%r9 + + /* Buffer location (in crash memory) and size. As the purgatory is + * behind the point of no return it can re-use the stack as buffer. + */ + lghi %r11,bufsz + larl %r12,stack + + MEMCPY %r12,%r9,%r11 /* dst -> (crash) buf */ + MEMCPY %r9,%r8,%r11 /* self -> dst */ + + /* Jump to new location. */ + lgr %r7,%r9 + aghi %r7,.jump_to_dst-purgatory_start + br %r7 + +.jump_to_dst: + basr %r13,0 +.base_dst: + + /* clear buffer */ + MEMCPY %r12,%r10,%r11 /* (crash) buf -> (crash) dst */ + + /* Load new buffer location after jump */ + larl %r7,stack + aghi %r10,stack-purgatory_start + MEMCPY %r10,%r7,%r11 /* (new) buf -> (crash) buf */ + + /* Now the code is set up to run from its designated location. Start + * swapping the rest of crash memory now. + * + * The registers will be used as follow: + * + * %r0-%r4 reserved for macros defined above + * %r5-%r6 tmp registers + * %r7 pointer to current struct sha region + * %r8 index to iterate over all sha regions + * %r9 pointer in crash memory + * %r10 pointer in old kernel + * %r11 total size (still) to be moved + * %r12 pointer to buffer + */ + lgr %r12,%r7 + lgr %r11,%r9 + lghi %r10,0 + lg %r9,crash_start-.base_dst(%r13) + lghi %r8,16 /* KEXEC_SEGMENTS_MAX */ + larl %r7,purgatory_sha_regions + + j .loop_first + + /* Loop over all purgatory_sha_regions. */ +.loop_next: + aghi %r8,-1 + cghi %r8,0 + je .loop_out + + aghi %r7,__KEXEC_SHA_REGION_SIZE + +.loop_first: + lg %r5,__KEXEC_SHA_REGION_START(%r7) + cghi %r5,0 + je .loop_next + + /* Copy [end last sha region, start current sha region) */ + /* Note: kexec_sha_region->start points in crash memory */ + sgr %r5,%r9 + MEMCPY %r9,%r10,%r5 + + agr %r9,%r5 + agr %r10,%r5 + sgr %r11,%r5 + + /* Swap sha region */ + lg %r6,__KEXEC_SHA_REGION_LEN(%r7) + MEMSWAP %r9,%r10,%r12,%r6 + sg %r11,__KEXEC_SHA_REGION_LEN(%r7) + j .loop_next + +.loop_out: + /* Copy rest of crash memory */ + MEMCPY %r9,%r10,%r11 + + /* start crash kernel */ + START_NEXT_KERNEL .base_dst + load_psw_mask: .long 0x00080000,0x80000000 @@ -89,8 +259,21 @@ kernel_entry: .global kernel_entry .quad 0 +kernel_type: + .global kernel_type + .quad 0 + +crash_start: + .global crash_start + .quad 0 + +crash_size: + .global crash_size + .quad 0 + .align PAGE_SIZE stack: - .skip PAGE_SIZE + /* The buffer to move this code must be as big as the code. */ + .skip stack-purgatory_start .align PAGE_SIZE purgatory_end: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 52b92f2bf0b9..4e2beb3c29b7 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -16,6 +16,10 @@ struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; u64 kernel_entry; +u64 kernel_type; + +u64 crash_start; +u64 crash_size; int verify_sha256_digest(void) { From 8be018827154666d1fe5904cb7a43b6706e01c87 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Mon, 11 Sep 2017 15:15:29 +0200 Subject: [PATCH 122/660] s390/kexec_file: Add ELF loader Add an ELF loader for kexec_file. The main task here is to do proper sanity checks on the ELF file. Basically all other functionality was already implemented for the image loader. Signed-off-by: Philipp Rudo Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kexec.h | 1 + arch/s390/kernel/Makefile | 1 + arch/s390/kernel/kexec_elf.c | 147 ++++++++++++++++++++++++++ arch/s390/kernel/machine_kexec_file.c | 1 + 4 files changed, 150 insertions(+) create mode 100644 arch/s390/kernel/kexec_elf.c diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 4c9da9974cf4..825dd0f7f221 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -67,5 +67,6 @@ int *kexec_file_update_kernel(struct kimage *iamge, struct s390_load_data *data); extern const struct kexec_file_ops s390_kexec_image_ops; +extern const struct kexec_file_ops s390_kexec_elf_ops; #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a84e9611c5c7..84ea6225efb4 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o +obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c new file mode 100644 index 000000000000..5a286b012043 --- /dev/null +++ b/arch/s390/kernel/kexec_elf.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ELF loader for kexec_file_load system call. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo + */ + +#include +#include +#include +#include + +static int kexec_file_add_elf_kernel(struct kimage *image, + struct s390_load_data *data, + char *kernel, unsigned long kernel_len) +{ + struct kexec_buf buf; + const Elf_Ehdr *ehdr; + const Elf_Phdr *phdr; + int i, ret; + + ehdr = (Elf_Ehdr *)kernel; + buf.image = image; + + phdr = (void *)ehdr + ehdr->e_phoff; + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type != PT_LOAD) + continue; + + buf.buffer = kernel + phdr->p_offset; + buf.bufsz = phdr->p_filesz; + + buf.mem = ALIGN(phdr->p_paddr, phdr->p_align); + buf.memsz = phdr->p_memsz; + + if (phdr->p_paddr == 0) { + data->kernel_buf = buf.buffer; + data->memsz += STARTUP_NORMAL_OFFSET; + + buf.buffer += STARTUP_NORMAL_OFFSET; + buf.bufsz -= STARTUP_NORMAL_OFFSET; + + buf.mem += STARTUP_NORMAL_OFFSET; + buf.memsz -= STARTUP_NORMAL_OFFSET; + } + + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + + ret = kexec_add_buffer(&buf); + if (ret) + return ret; + + data->memsz += buf.memsz; + } + + return 0; +} + +static void *s390_elf_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + struct s390_load_data data = {0}; + const Elf_Ehdr *ehdr; + const Elf_Phdr *phdr; + size_t size; + int i, ret; + + /* image->fobs->probe already checked for valid ELF magic number. */ + ehdr = (Elf_Ehdr *)kernel; + + if (ehdr->e_type != ET_EXEC || + ehdr->e_ident[EI_CLASS] != ELFCLASS64 || + !elf_check_arch(ehdr)) + return ERR_PTR(-EINVAL); + + if (!ehdr->e_phnum || ehdr->e_phentsize != sizeof(Elf_Phdr)) + return ERR_PTR(-EINVAL); + + size = ehdr->e_ehsize + ehdr->e_phoff; + size += ehdr->e_phentsize * ehdr->e_phnum; + if (size > kernel_len) + return ERR_PTR(-EINVAL); + + phdr = (void *)ehdr + ehdr->e_phoff; + size = ALIGN(size, phdr->p_align); + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type == PT_INTERP) + return ERR_PTR(-EINVAL); + + if (phdr->p_offset > kernel_len) + return ERR_PTR(-EINVAL); + + size += ALIGN(phdr->p_filesz, phdr->p_align); + } + + if (size > kernel_len) + return ERR_PTR(-EINVAL); + + ret = kexec_file_add_elf_kernel(image, &data, kernel, kernel_len); + if (ret) + return ERR_PTR(ret); + + if (!data.memsz) + return ERR_PTR(-EINVAL); + + if (initrd) { + ret = kexec_file_add_initrd(image, &data, initrd, initrd_len); + if (ret) + return ERR_PTR(ret); + } + + ret = kexec_file_add_purgatory(image, &data); + if (ret) + return ERR_PTR(ret); + + return kexec_file_update_kernel(image, &data); +} + +static int s390_elf_probe(const char *buf, unsigned long len) +{ + const Elf_Ehdr *ehdr; + + if (len < sizeof(Elf_Ehdr)) + return -ENOEXEC; + + ehdr = (Elf_Ehdr *)buf; + + /* Only check the ELF magic number here and do proper validity check + * in the loader. Any check here that fails would send the erroneous + * ELF file to the image loader that does not care what it gets. + * (Most likely) causing behavior not intended by the user. + */ + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) + return -ENOEXEC; + + return 0; +} + +const struct kexec_file_ops s390_kexec_elf_ops = { + .probe = s390_elf_probe, + .load = s390_elf_load, +}; diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 75aea2c1d823..f413f57f8d20 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,6 +12,7 @@ #include const struct kexec_file_ops * const kexec_file_loaders[] = { + &s390_kexec_elf_ops, &s390_kexec_image_ops, NULL, }; From bdea9f6f7a707301878573a5c35e39e4fe817378 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Tue, 27 Mar 2018 13:14:12 +0200 Subject: [PATCH 123/660] s390/Kconfig: Move kexec config options to "Processor type and features" The config options for kexec are currently not under any menu directory. Up until now this was not a problem as standard kexec is always compiled in and thus does not create a menu entry. This changed when kexec_file_load was enabled. Its config option requires a menu entry which, when added beneath standard kexec option, appears on the main directory above "General Setup". Thus move the whole block further down such that the entry in now in "Processor type and features". While at it also update the help text for kexec file. Signed-off-by: Philipp Rudo Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3223ce0680c0..beccb58a82e5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -47,27 +47,6 @@ config PGSTE config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config KEXEC - def_bool y - select KEXEC_CORE - -config KEXEC_FILE - bool "kexec file based system call" - select KEXEC_CORE - select BUILD_BIN2C - depends on CRYPTO - depends on CRYPTO_SHA256 - depends on CRYPTO_SHA256_S390 - ---help--- - This is new version of kexec system call. This system call is - file based and takes file descriptors as system call argument - for kernel and initramfs as opposed to list of segments as - accepted by previous system call. - -config ARCH_HAS_KEXEC_PURGATORY - def_bool y - depends on KEXEC_FILE - config AUDIT_ARCH def_bool y @@ -542,6 +521,26 @@ source kernel/Kconfig.preempt source kernel/Kconfig.hz +config KEXEC + def_bool y + select KEXEC_CORE + +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select BUILD_BIN2C + depends on CRYPTO + depends on CRYPTO_SHA256 + depends on CRYPTO_SHA256_S390 + help + Enable the kexec file based system call. In contrast to the normal + kexec system call this system call takes file descriptors for the + kernel and initramfs as arguments. + +config ARCH_HAS_KEXEC_PURGATORY + def_bool y + depends on KEXEC_FILE + config ARCH_RANDOM def_bool y prompt "s390 architectural random number generation API" From de66b2429100c85b72db5c409526351d3ffc5faa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 12 Apr 2018 13:45:52 +0200 Subject: [PATCH 124/660] s390/kexec_file: add generated files to .gitignore Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/purgatory/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 arch/s390/purgatory/.gitignore diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore new file mode 100644 index 000000000000..e9e66f178a6d --- /dev/null +++ b/arch/s390/purgatory/.gitignore @@ -0,0 +1,2 @@ +kexec-purgatory.c +purgatory.ro From 701e188c6560d6abeba508f530c4224b4e830fb5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 12 Apr 2018 08:42:48 +0100 Subject: [PATCH 125/660] s390/decompressor: Ignore file vmlinux.bin.full Commit 81796a3c6a4a ("s390/decompressor: trim uncompressed image head during the build") introduced a new file named vmlinux.bin.full in directory arch/s390/boot/compressed. Add this file to the list of ignored files so it does not show up on git status. Signed-off-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index ae06b9b4c02f..2088cc140629 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,3 +1,4 @@ sizes.h vmlinux vmlinux.lds +vmlinux.bin.full From 232acdff21fb02f0ccd538cd29c9ee7e028b6101 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 10 Apr 2018 12:39:34 +0200 Subject: [PATCH 126/660] s390/nospec: include cpu.h Fix the following sparse warnings: symbol 'cpu_show_spectre_v1' was not declared. Should it be static? symbol 'cpu_show_spectre_v2' was not declared. Should it be static? Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/nospec-branch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index f236ce8757e8..46d49a11663f 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include static int __init nobp_setup_early(char *str) From c65bbb51c6e98a1956c08faab81941ec558ef0ba Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Apr 2018 10:24:29 +0200 Subject: [PATCH 127/660] s390/boot: remove unused COMPILE_VERSION and ccflags-y ccflags-y has no effect (no code is built in that directory, arch/s390/boot/compressed/Makefile defines its own KBUILD_CFLAGS). Removing ccflags-y together with COMPILE_VERSION. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index da9dad35c28e..d1fa37fcce83 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,12 +3,6 @@ # Makefile for the linux s390-specific parts of the memory manager. # -COMPILE_VERSION := __linux_compile_version_id__`hostname | \ - tr -c '[0-9A-Za-z]' '_'`__`date | \ - tr -c '[0-9A-Za-z]' '_'`_t - -ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. - targets := image targets += bzImage subdir- := compressed From f43c426a581f04272a852f0486ae431acff6d87e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Apr 2018 10:57:27 +0200 Subject: [PATCH 128/660] s390: remove couple of duplicate includes Removing couple of duplicate includes, found by "make includecheck". That leaves 1 duplicate include in arch/s390/kernel/entry.S, which is there for a reason (it includes generated asm/syscall_table.h twice). Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_diag.c | 1 - drivers/s390/net/qeth_l2_main.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index f035c2f25d35..131f1989f6f3 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "dasd_int.h" #include "dasd_diag.h" diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 50a313806dde..2ad6f12f3d49 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include "qeth_core.h" #include "qeth_l2.h" From 451239eb3d397bd197a79cc3aab943da41ba0905 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 13 Apr 2018 14:04:24 +0200 Subject: [PATCH 129/660] s390: add support for IBM z14 Model ZR1 Just add the new machine type number to the two places that matter. Cc: # v4.14+ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 8 ++++---- arch/s390/kernel/perf_cpum_cf_events.c | 1 + arch/s390/kernel/setup.c | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index beccb58a82e5..199ac3e4da1d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -286,12 +286,12 @@ config MARCH_Z13 older machines. config MARCH_Z14 - bool "IBM z14" + bool "IBM z14 ZR1 and z14" select HAVE_MARCH_Z14_FEATURES help - Select this to enable optimizations for IBM z14 (3906 series). - The kernel will be slightly faster but will not work on older - machines. + Select this to enable optimizations for IBM z14 ZR1 and z14 (3907 + and 3906 series). The kernel will be slightly faster but will not + work on older machines. endchoice diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index c5bc3f209652..5ee27dc9a10c 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -583,6 +583,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) model = cpumcf_z13_pmu_event_attr; break; case 0x3906: + case 0x3907: model = cpumcf_z14_pmu_event_attr; break; default: diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index fc3b4aa185cc..d82a9ec64ea9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -821,6 +821,7 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; case 0x3906: + case 0x3907: strcpy(elf_platform, "z14"); break; } From fe710508b6ba9d28730f3021fed70e7043433b2e Mon Sep 17 00:00:00 2001 From: Collin May Date: Sat, 7 Apr 2018 14:32:48 -0700 Subject: [PATCH 130/660] USB: serial: simple: add libtransistor console Add simple driver for libtransistor USB console. This device is implemented in software: https://github.com/reswitched/libtransistor/blob/development/lib/usb_serial.c Signed-off-by: Collin May Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/Kconfig | 1 + drivers/usb/serial/usb-serial-simple.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index a646820f5a78..533f127c30ad 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -62,6 +62,7 @@ config USB_SERIAL_SIMPLE - Fundamental Software dongle. - Google USB serial devices - HP4x calculators + - Libtransistor USB console - a number of Motorola phones - Motorola Tetra devices - Novatel Wireless GPS receivers diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 4ef79e29cb26..40864c2bd9dc 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -63,6 +63,11 @@ DEVICE(flashloader, FLASHLOADER_IDS); 0x01) } DEVICE(google, GOOGLE_IDS); +/* Libtransistor USB console */ +#define LIBTRANSISTOR_IDS() \ + { USB_DEVICE(0x1209, 0x8b00) } +DEVICE(libtransistor, LIBTRANSISTOR_IDS); + /* ViVOpay USB Serial Driver */ #define VIVOPAY_IDS() \ { USB_DEVICE(0x1d5f, 0x1004) } /* ViVOpay 8800 */ @@ -110,6 +115,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &funsoft_device, &flashloader_device, &google_device, + &libtransistor_device, &vivopay_device, &moto_modem_device, &motorola_tetra_device, @@ -126,6 +132,7 @@ static const struct usb_device_id id_table[] = { FUNSOFT_IDS(), FLASHLOADER_IDS(), GOOGLE_IDS(), + LIBTRANSISTOR_IDS(), VIVOPAY_IDS(), MOTO_IDS(), MOTOROLA_TETRA_IDS(), From 2f18d46683cb3047c41229d57cf7c6e2ee48676f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 4 Apr 2018 10:15:38 +0200 Subject: [PATCH 131/660] rbd: refactor rbd_wait_state_locked() In preparation for lock_timeout option, make rbd_wait_state_locked() return error codes. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 07dc5419bd63..f4b1b91e6d4d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3533,9 +3533,21 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, /* * lock_rwsem must be held for read */ -static void rbd_wait_state_locked(struct rbd_device *rbd_dev) +static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire) { DEFINE_WAIT(wait); + int ret = 0; + + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) + return -EBLACKLISTED; + + if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) + return 0; + + if (!may_acquire) { + rbd_warn(rbd_dev, "exclusive lock required"); + return -EROFS; + } do { /* @@ -3549,10 +3561,14 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev) up_read(&rbd_dev->lock_rwsem); schedule(); down_read(&rbd_dev->lock_rwsem); - } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { + ret = -EBLACKLISTED; + break; + } + } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED); finish_wait(&rbd_dev->lock_waitq, &wait); + return ret; } static void rbd_queue_workfn(struct work_struct *work) @@ -3638,19 +3654,10 @@ static void rbd_queue_workfn(struct work_struct *work) (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); if (must_be_locked) { down_read(&rbd_dev->lock_rwsem); - if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { - if (rbd_dev->opts->exclusive) { - rbd_warn(rbd_dev, "exclusive lock required"); - result = -EROFS; - goto err_unlock; - } - rbd_wait_state_locked(rbd_dev); - } - if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { - result = -EBLACKLISTED; + result = rbd_wait_state_locked(rbd_dev, + !rbd_dev->opts->exclusive); + if (result) goto err_unlock; - } } img_request = rbd_img_request_create(rbd_dev, op_type, snapc); @@ -5216,6 +5223,8 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) { + int ret; + if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); return -EINVAL; @@ -5223,9 +5232,9 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) /* FIXME: "rbd map --exclusive" should be in interruptible */ down_read(&rbd_dev->lock_rwsem); - rbd_wait_state_locked(rbd_dev); + ret = rbd_wait_state_locked(rbd_dev, true); up_read(&rbd_dev->lock_rwsem); - if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { + if (ret) { rbd_warn(rbd_dev, "failed to acquire exclusive lock"); return -EROFS; } From 34f55d0b3a0a39c95134c0c89173893b846d4c80 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 26 Mar 2018 10:22:55 -0400 Subject: [PATCH 132/660] rbd: support timeout in rbd_wait_state_locked() currently, the rbd_wait_state_locked() will wait forever if we can't get our state locked. Example: rbd map --exclusive test1 --> /dev/rbd0 rbd map test1 --> /dev/rbd1 dd if=/dev/zero of=/dev/rbd1 bs=1M count=1 --> IO blocked To avoid this problem, this patch introduce a timeout design in rbd_wait_state_locked(). Then rbd_wait_state_locked() will return error when we reach a timeout. This patch allow user to set the lock_timeout in rbd mapping. Signed-off-by: Dongsheng Yang Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f4b1b91e6d4d..d5a51493e8b5 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -732,6 +732,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) */ enum { Opt_queue_depth, + Opt_lock_timeout, Opt_last_int, /* int args above */ Opt_last_string, @@ -745,6 +746,7 @@ enum { static match_table_t rbd_opts_tokens = { {Opt_queue_depth, "queue_depth=%d"}, + {Opt_lock_timeout, "lock_timeout=%d"}, /* int args above */ /* string args above */ {Opt_read_only, "read_only"}, @@ -758,12 +760,14 @@ static match_table_t rbd_opts_tokens = { struct rbd_options { int queue_depth; + unsigned long lock_timeout; bool read_only; bool lock_on_read; bool exclusive; }; #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ +#define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */ #define RBD_READ_ONLY_DEFAULT false #define RBD_LOCK_ON_READ_DEFAULT false #define RBD_EXCLUSIVE_DEFAULT false @@ -796,6 +800,14 @@ static int parse_rbd_opts_token(char *c, void *private) } rbd_opts->queue_depth = intval; break; + case Opt_lock_timeout: + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (intval < 0 || intval > INT_MAX / 1000) { + pr_err("lock_timeout out of range\n"); + return -EINVAL; + } + rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000); + break; case Opt_read_only: rbd_opts->read_only = true; break; @@ -3536,6 +3548,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire) { DEFINE_WAIT(wait); + unsigned long timeout; int ret = 0; if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) @@ -3559,12 +3572,18 @@ static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire) prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait, TASK_UNINTERRUPTIBLE); up_read(&rbd_dev->lock_rwsem); - schedule(); + timeout = schedule_timeout(ceph_timeout_jiffies( + rbd_dev->opts->lock_timeout)); down_read(&rbd_dev->lock_rwsem); if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { ret = -EBLACKLISTED; break; } + if (!timeout) { + rbd_warn(rbd_dev, "timed out waiting for lock"); + ret = -ETIMEDOUT; + break; + } } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED); finish_wait(&rbd_dev->lock_waitq, &wait); @@ -5186,6 +5205,7 @@ static int rbd_add_parse_args(const char *buf, rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; + rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; From ffdeec7aa41aa61ca4ee68fddf4669df9ce661d1 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 26 Mar 2018 16:46:39 +0800 Subject: [PATCH 133/660] ceph: always update atime/mtime/ctime for new inode For new inode, atime/mtime/ctime are uninitialized. Don't compare against them. Cc: stable@kernel.org Signed-off-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8bf60250309e..ae056927080d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_FILE_BUFFER| CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { - if (timespec_compare(ctime, &inode->i_ctime) > 0) { + if (ci->i_version == 0 || + timespec_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode->i_ctime = *ctime; } - if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { + if (ci->i_version == 0 || + ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %ld.%09ld -> %ld.%09ld " "tw %d -> %d\n", @@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ - ci->i_version = le64_to_cpu(info->version); inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; @@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, xattr_blob = NULL; } + /* finally update i_version */ + ci->i_version = le64_to_cpu(info->version); + inode->i_mapping->a_ops = &ceph_aops; switch (inode->i_mode & S_IFMT) { From c6244b3b23771b258656445dcd212be759265b84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Apr 2018 14:53:39 +0200 Subject: [PATCH 134/660] rbd: avoid Wreturn-type warnings In some configurations gcc cannot see that rbd_assert(0) leads to an unreachable code path: drivers/block/rbd.c: In function 'rbd_img_is_write': drivers/block/rbd.c:1397:1: error: control reaches end of non-void function [-Werror=return-type] drivers/block/rbd.c: In function '__rbd_obj_handle_request': drivers/block/rbd.c:2499:1: error: control reaches end of non-void function [-Werror=return-type] drivers/block/rbd.c: In function 'rbd_obj_handle_write': drivers/block/rbd.c:2471:1: error: control reaches end of non-void function [-Werror=return-type] As the rbd_assert() here shows has no extra information beyond the verbose BUG(), we can simply use BUG() directly in its place. This is reliably detected as not returning on any architecture, since it doesn't depend on the unlikely() comparison that confused gcc. Fixes: 3da691bf4366 ("rbd: new request handling code") Signed-off-by: Arnd Bergmann Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d5a51493e8b5..e40e490ff967 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1404,7 +1404,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req) case OBJ_OP_DISCARD: return true; default: - rbd_assert(0); + BUG(); } } @@ -2478,7 +2478,7 @@ again: } return false; default: - rbd_assert(0); + BUG(); } } @@ -2506,7 +2506,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req) } return false; default: - rbd_assert(0); + BUG(); } } From 420efbdf4d2358dc12913298ad44d041c6ac0ed6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Apr 2018 09:32:18 +0200 Subject: [PATCH 135/660] rbd: adjust queue limits for "fancy" striping In order to take full advantage of merging in ceph_file_to_extents(), allow object set sized I/Os. If the layout is not "fancy", an object set consists of just one object. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index e40e490ff967..6a1805858b79 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3928,7 +3928,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) { struct gendisk *disk; struct request_queue *q; - u64 segment_size; + unsigned int objset_bytes = + rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; int err; /* create gendisk info */ @@ -3968,20 +3969,18 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ - /* set io sizes to object size */ - segment_size = rbd_obj_bytes(&rbd_dev->header); - blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); q->limits.max_sectors = queue_max_hw_sectors(q); blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, segment_size); - blk_queue_io_opt(q, segment_size); + blk_queue_io_min(q, objset_bytes); + blk_queue_io_opt(q, objset_bytes); /* enable the discard support */ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = segment_size; - blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); - blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); + q->limits.discard_granularity = objset_bytes; + blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); + blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; From d93605407af34eb0b7eb8aff6b1eae2cde3cdd22 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 23 Mar 2018 06:14:47 +0100 Subject: [PATCH 136/660] rbd: notrim map option Add an option to turn off discard and write zeroes offload support to avoid deprovisioning a fully provisioned image. When enabled, discard requests will fail with -EOPNOTSUPP, write zeroes requests will fall back to manually zeroing. Signed-off-by: Ilya Dryomov Tested-by: Hitoshi Kamei --- drivers/block/rbd.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6a1805858b79..8e8b04cc569a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -741,6 +741,7 @@ enum { Opt_read_write, Opt_lock_on_read, Opt_exclusive, + Opt_notrim, Opt_err }; @@ -755,6 +756,7 @@ static match_table_t rbd_opts_tokens = { {Opt_read_write, "rw"}, /* Alternate spelling */ {Opt_lock_on_read, "lock_on_read"}, {Opt_exclusive, "exclusive"}, + {Opt_notrim, "notrim"}, {Opt_err, NULL} }; @@ -764,6 +766,7 @@ struct rbd_options { bool read_only; bool lock_on_read; bool exclusive; + bool trim; }; #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ @@ -771,6 +774,7 @@ struct rbd_options { #define RBD_READ_ONLY_DEFAULT false #define RBD_LOCK_ON_READ_DEFAULT false #define RBD_EXCLUSIVE_DEFAULT false +#define RBD_TRIM_DEFAULT true static int parse_rbd_opts_token(char *c, void *private) { @@ -820,6 +824,9 @@ static int parse_rbd_opts_token(char *c, void *private) case Opt_exclusive: rbd_opts->exclusive = true; break; + case Opt_notrim: + rbd_opts->trim = false; + break; default: /* libceph prints "bad option" msg */ return -EINVAL; @@ -3976,11 +3983,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_io_min(q, objset_bytes); blk_queue_io_opt(q, objset_bytes); - /* enable the discard support */ - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = objset_bytes; - blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); - blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); + if (rbd_dev->opts->trim) { + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + q->limits.discard_granularity = objset_bytes; + blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); + blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); + } if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; @@ -5207,6 +5215,7 @@ static int rbd_add_parse_args(const char *buf, rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; + rbd_opts->trim = RBD_TRIM_DEFAULT; copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, From 4f34a5130a471f32f2fe7750769ab4057dc3eaa0 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 14 Apr 2018 20:16:06 +0800 Subject: [PATCH 137/660] isofs: fix potential memory leak in mount option parsing When specifying string type mount option (e.g., iocharset) several times in a mount, current option parsing may cause memory leak. Hence, call kfree for previous one in this case. Meanwhile, check memory allocation result for it. Signed-off-by: Chengguang Xu Signed-off-by: Jan Kara --- fs/isofs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index bc258a4402f6..ec3fba7d492f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -394,7 +394,10 @@ static int parse_options(char *options, struct iso9660_options *popt) break; #ifdef CONFIG_JOLIET case Opt_iocharset: + kfree(popt->iocharset); popt->iocharset = match_strdup(&args[0]); + if (!popt->iocharset) + return 0; break; #endif case Opt_map_a: From 06856938112b84ff3c6b0594d017f59cfda2a43d Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sun, 15 Apr 2018 01:03:42 +0530 Subject: [PATCH 138/660] fs: ext2: Adding new return type vm_fault_t Use new return type vm_fault_t for page_mkwrite, pfn_mkwrite and fault handler. Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Signed-off-by: Jan Kara --- fs/ext2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 09640220fda8..047c327a6b23 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -88,11 +88,11 @@ out_unlock: * The default page_lock and i_size verification done by non-DAX fault paths * is sufficient because ext2 doesn't support hole punching. */ -static int ext2_dax_fault(struct vm_fault *vmf) +static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); struct ext2_inode_info *ei = EXT2_I(inode); - int ret; + vm_fault_t ret; if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); From 5f20407a2415181ce3af06fc84a86280a2708dd4 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Mon, 9 Oct 2017 12:56:56 -0600 Subject: [PATCH 139/660] watchdog: hpwdt: change maintainer. Signed-off-by: Jerry Hoemann Reviewed-by: Guenter Roeck Acked-by: Jimmy Vance Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0a1410d5a621..2a8a542a2cdb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6256,7 +6256,7 @@ S: Odd Fixes F: drivers/media/usb/hdpvr/ HEWLETT PACKARD ENTERPRISE ILO NMI WATCHDOG DRIVER -M: Jimmy Vance +M: Jerry Hoemann S: Supported F: Documentation/watchdog/hpwdt.txt F: drivers/watchdog/hpwdt.c From a03cc689e5c9d89d500f4a4dae1a81ea512dbb25 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 27 Mar 2018 14:25:40 -0500 Subject: [PATCH 140/660] watchdog: sch311x_wdt: Mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case I replaced "Fall" with a proper "Fall through" comment, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sch311x_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index 43d0cbb7ba0b..814cdf539b0f 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -299,7 +299,7 @@ static long sch311x_wdt_ioctl(struct file *file, unsigned int cmd, if (sch311x_wdt_set_heartbeat(new_timeout)) return -EINVAL; sch311x_wdt_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: From 3ff0751fa04208459b74cfbe492df79468b8425c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 27 Mar 2018 14:30:41 -0500 Subject: [PATCH 141/660] watchdog: w83977f_wdt: Mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case I replaced "Fall" with a proper "Fall through" comment, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/w83977f_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c index 20e2bba10400..672b61a7f9a3 100644 --- a/drivers/watchdog/w83977f_wdt.c +++ b/drivers/watchdog/w83977f_wdt.c @@ -427,7 +427,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; wdt_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); From e30d69df78fb2667dc58e906cabd0f70ed0af95d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 27 Mar 2018 14:33:49 -0500 Subject: [PATCH 142/660] watchdog: wafer5823wdt: Mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case I replaced "Fall" with a proper "Fall through" comment, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/wafer5823wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/wafer5823wdt.c b/drivers/watchdog/wafer5823wdt.c index db0da7ea4fd8..93c5b610e264 100644 --- a/drivers/watchdog/wafer5823wdt.c +++ b/drivers/watchdog/wafer5823wdt.c @@ -178,7 +178,7 @@ static long wafwdt_ioctl(struct file *file, unsigned int cmd, timeout = new_timeout; wafwdt_stop(); wafwdt_start(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, p); From fdac6a90d2d151abdbb7e5ec14bb9ab64e2931ec Mon Sep 17 00:00:00 2001 From: Veeraiyan Chidambaram Date: Fri, 13 Apr 2018 16:19:24 +0200 Subject: [PATCH 143/660] watchdog: renesas-wdt: Add support for WDIOF_CARDRESET This patch adds the WDIOF_CARDRESET support for the Renesas platform watchdog, to know if the board reboot is due to a watchdog reset. This is done via the WOVF bit (bit 4) of the RWTCSRA register, which indicates if RWTCNT overflowed, triggering the reset in last boot. Signed-off-by: Veeraiyan Chidambaram [takeshi.kihara.df: changed to read the RWTCSRA register while clock is enabled] Signed-off-by: Takeshi Kihara Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Reviewed-by: Vladimir Zapolskiy Reviewed-by: Geert Uytterhoeven Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/renesas_wdt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/renesas_wdt.c b/drivers/watchdog/renesas_wdt.c index 6b8c6ddfe30b..514db5cc1595 100644 --- a/drivers/watchdog/renesas_wdt.c +++ b/drivers/watchdog/renesas_wdt.c @@ -121,7 +121,8 @@ static int rwdt_restart(struct watchdog_device *wdev, unsigned long action, } static const struct watchdog_info rwdt_ident = { - .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT, + .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | + WDIOF_CARDRESET, .identity = "Renesas WDT Watchdog", }; @@ -197,9 +198,10 @@ static int rwdt_probe(struct platform_device *pdev) return PTR_ERR(clk); pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); priv->clk_rate = clk_get_rate(clk); + priv->wdev.bootstatus = (readb_relaxed(priv->base + RWTCSRA) & + RWTCSRA_WOVF) ? WDIOF_CARDRESET : 0; pm_runtime_put(&pdev->dev); if (!priv->clk_rate) { From 49d4d277ca54e04170d39484c8758a0ea9bca37d Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 27 Mar 2018 15:09:27 -0500 Subject: [PATCH 144/660] aspeed: watchdog: Set bootstatus during probe Check the aspeed timeout status register to see if the system has booted from the secondary boot source. If so, set the watchdog device bootstatus flag for "Card previously reset the CPU." Signed-off-by: Eddie James Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/aspeed_wdt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index a5b8eb21201f..1abe4d021fd2 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -55,6 +55,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); #define WDT_CTRL_WDT_INTR BIT(2) #define WDT_CTRL_RESET_SYSTEM BIT(1) #define WDT_CTRL_ENABLE BIT(0) +#define WDT_TIMEOUT_STATUS 0x10 +#define WDT_TIMEOUT_STATUS_BOOT_SECONDARY BIT(1) /* * WDT_RESET_WIDTH controls the characteristics of the external pulse (if @@ -192,6 +194,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev) struct device_node *np; const char *reset_type; u32 duration; + u32 status; int ret; wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL); @@ -307,6 +310,10 @@ static int aspeed_wdt_probe(struct platform_device *pdev) writel(duration - 1, wdt->base + WDT_RESET_WIDTH); } + status = readl(wdt->base + WDT_TIMEOUT_STATUS); + if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) + wdt->wdd.bootstatus = WDIOF_CARDRESET; + ret = devm_watchdog_register_device(&pdev->dev, &wdt->wdd); if (ret) { dev_err(&pdev->dev, "failed to register\n"); From de2011197d15746307e709687401397fe52bea83 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 20 Nov 2017 08:48:02 +0100 Subject: [PATCH 145/660] s390: update defconfig Signed-off-by: Martin Schwidefsky --- arch/s390/configs/default_defconfig | 31 ++++++++++++++++--------- arch/s390/configs/performance_defconfig | 20 ++++++++++++---- arch/s390/defconfig | 13 +++++++++-- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 2310315b0744..6176fe9795ca 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -24,13 +24,13 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -59,10 +59,11 @@ CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y CONFIG_LIVEPATCH=y CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=256 +CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_PREEMPT=y CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -305,7 +306,6 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -364,11 +364,11 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_OPENVSWITCH=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -CONFIG_NET_TCPPROBE=m CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 @@ -380,9 +380,9 @@ CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_DAX=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=m CONFIG_ENCLOSURE_SERVICES=m CONFIG_GENWQE=m CONFIG_RAID_ATTRS=m @@ -461,6 +461,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -474,6 +475,9 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m @@ -482,7 +486,9 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -641,6 +647,8 @@ CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_S390_PTDUMP=y +CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y @@ -649,17 +657,20 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_INTEGRITY_SIGNATURE=y +CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y +CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -707,9 +718,8 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y -CONFIG_ASYMMETRIC_KEY_TYPE=y -CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m -CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_CRC7=m CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y @@ -719,4 +729,3 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_KVM_S390_UCONTROL=y CONFIG_VHOST_NET=m -CONFIG_KEXEC_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 20ed149e1137..c105bcc6d7a6 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -25,13 +25,13 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -45,6 +45,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y @@ -62,6 +64,7 @@ CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -301,7 +304,6 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -359,11 +361,11 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_OPENVSWITCH=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -CONFIG_NET_TCPPROBE=m CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 @@ -375,8 +377,9 @@ CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_DAX=y CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=m CONFIG_ENCLOSURE_SERVICES=m CONFIG_GENWQE=m CONFIG_RAID_ATTRS=m @@ -455,6 +458,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -468,6 +472,9 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m @@ -476,7 +483,9 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -507,7 +516,6 @@ CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_OVERLAY_FS=m -CONFIG_OVERLAY_FS_REDIRECT_DIR=y CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y @@ -592,8 +600,10 @@ CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_USER=m diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 46a3178d8bc6..f40600eb1762 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -8,6 +8,7 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_CPU_ISOLATION is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y @@ -23,12 +24,12 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -47,6 +48,7 @@ CONFIG_LIVEPATCH=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -129,10 +131,13 @@ CONFIG_EQUALIZER=m CONFIG_TUN=m CONFIG_VIRTIO_NET=y # CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_CORTINA is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SOCIONEXT is not set # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set +# CONFIG_VT is not set CONFIG_DEVKMEM=y CONFIG_RAW_DRIVER=m CONFIG_VIRTIO_BALLOON=y @@ -177,13 +182,15 @@ CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_FUNCTION_PROFILER=y -CONFIG_KPROBES_SANITY_TEST=y +# CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -213,6 +220,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m From cd7cf57f18be4196306997d4325b8ebf895ab318 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 12 Apr 2018 11:00:31 +0200 Subject: [PATCH 146/660] s390: remove gcov defconfig This config is not needed anymore. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/configs/gcov_defconfig | 661 ------------------------------- 1 file changed, 661 deletions(-) delete mode 100644 arch/s390/configs/gcov_defconfig diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig deleted file mode 100644 index d52eafe57ae8..000000000000 --- a/arch/s390/configs/gcov_defconfig +++ /dev/null @@ -1,661 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_AUDIT=y -CONFIG_NO_HZ_IDLE=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_NUMA_BALANCING=y -# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -CONFIG_BLK_CGROUP=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y -CONFIG_NAMESPACES=y -CONFIG_USER_NS=y -CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set -CONFIG_BPF_SYSCALL=y -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_OPROFILE=m -CONFIG_KPROBES=y -CONFIG_JUMP_LABEL=y -CONFIG_GCOV_KERNEL=y -CONFIG_GCOV_PROFILE_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_FORCE_LOAD=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_BLK_DEV_INTEGRITY=y -CONFIG_BLK_DEV_THROTTLING=y -CONFIG_BLK_WBT=y -CONFIG_BLK_WBT_SQ=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_CFQ_GROUP_IOSCHED=y -CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=512 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y -CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZBUD=m -CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_STAT=y -CONFIG_DEFERRED_STRUCT_PAGE_INIT=y -CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_PCI=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_S390=y -CONFIG_CHSC_SCH=y -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_PACKET_DIAG=m -CONFIG_UNIX=y -CONFIG_UNIX_DIAG=m -CONFIG_XFRM_USER=m -CONFIG_NET_KEY=m -CONFIG_SMC=m -CONFIG_SMC_DIAG=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE_DEMUX=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_NET_IPVTI=m -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_INET_DIAG=m -CONFIG_INET_UDP_DIAG=m -CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_HSTCP=m -CONFIG_TCP_CONG_HYBLA=m -CONFIG_TCP_CONG_SCALABLE=m -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_CONG_VENO=m -CONFIG_TCP_CONG_YEAH=m -CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_SIT=m -CONFIG_IPV6_GRE=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_TABLES=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m -CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m -CONFIG_NFT_LOG=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_NAT=m -CONFIG_NFT_COMPAT=m -CONFIG_NFT_HASH=m -CONFIG_NETFILTER_XT_SET=m -CONFIG_NETFILTER_XT_TARGET_AUDIT=m -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LOG=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m -CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_IPVS=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_IP_SET=m -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_LIST_SET=m -CONFIG_IP_VS=m -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_PROTO_ESP=y -CONFIG_IP_VS_PROTO_AH=y -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m -CONFIG_IP_VS_FTP=m -CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m -CONFIG_NFT_CHAIN_NAT_IPV4=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NET_SCTPPROBE=m -CONFIG_RDS=m -CONFIG_RDS_RDMA=m -CONFIG_RDS_TCP=m -CONFIG_L2TP=m -CONFIG_L2TP_DEBUGFS=m -CONFIG_L2TP_V3=y -CONFIG_L2TP_IP=m -CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_VLAN_8021Q_GVRP=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFB=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_MQPRIO=m -CONFIG_NET_SCH_CHOKE=m -CONFIG_NET_SCH_QFQ=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_SCH_PLUG=m -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_CGROUP=y -CONFIG_NET_CLS_BPF=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_ACT_CSUM=m -CONFIG_DNS_RESOLVER=y -CONFIG_NETLINK_DIAG=m -CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y -CONFIG_NET_PKTGEN=m -CONFIG_NET_TCPPROBE=m -CONFIG_DEVTMPFS=y -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=0 -CONFIG_CONNECTOR=y -CONFIG_ZRAM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_DRBD=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_DAX=y -CONFIG_VIRTIO_BLK=y -CONFIG_ENCLOSURE_SERVICES=m -CONFIG_GENWQE=m -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=y -CONFIG_CHR_DEV_SCH=m -CONFIG_SCSI_ENCLOSURE=m -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SAS_LIBSAS=m -CONFIG_SCSI_SRP_ATTRS=m -CONFIG_ISCSI_TCP=m -CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=y -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_HP_SW=m -CONFIG_SCSI_DH_EMC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_MIRROR=m -CONFIG_DM_LOG_USERSPACE=m -CONFIG_DM_RAID=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_QL=m -CONFIG_DM_MULTIPATH_ST=m -CONFIG_DM_DELAY=m -CONFIG_DM_UEVENT=y -CONFIG_DM_FLAKEY=m -CONFIG_DM_VERITY=m -CONFIG_DM_SWITCH=m -CONFIG_NETDEVICES=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -CONFIG_EQUALIZER=m -CONFIG_IFB=m -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -CONFIG_VXLAN=m -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_VIRTIO_NET=m -CONFIG_NLMON=m -# CONFIG_NET_VENDOR_ARC is not set -# CONFIG_NET_VENDOR_CHELSIO is not set -# CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MARVELL is not set -CONFIG_MLX4_EN=m -CONFIG_MLX5_CORE=m -CONFIG_MLX5_CORE_EN=y -# CONFIG_NET_VENDOR_NATSEMI is not set -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_PPTP=m -CONFIG_PPPOL2TP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_TN3270_FS=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_SOFT_WATCHDOG=m -CONFIG_DIAG288_WATCHDOG=m -# CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_MLX4_INFINIBAND=m -CONFIG_MLX5_INFINIBAND=m -CONFIG_VFIO=m -CONFIG_VFIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y -CONFIG_JBD2_DEBUG=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -CONFIG_JFS_STATISTICS=y -CONFIG_XFS_FS=y -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -CONFIG_GFS2_FS=m -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_OCFS2_FS=m -CONFIG_BTRFS_FS=y -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y -CONFIG_EXPORTFS_BLOCK_OPS=y -CONFIG_FANOTIFY=y -CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y -CONFIG_QUOTA_NETLINK_INTERFACE=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=y -CONFIG_CUSE=m -CONFIG_OVERLAY_FS=m -CONFIG_OVERLAY_FS_REDIRECT_DIR=y -CONFIG_FSCACHE=m -CONFIG_CACHEFILES=m -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m -CONFIG_ECRYPT_FS=m -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_ROMFS_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=m -CONFIG_NFS_SWAP=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -CONFIG_NFSD_V4_SECURITY_LABEL=y -CONFIG_CIFS=m -CONFIG_CIFS_STATS=y -CONFIG_CIFS_STATS2=y -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_CIFS_UPCALL=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_DEBUG is not set -CONFIG_CIFS_DFS_UPCALL=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_MEMORY_INIT=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -CONFIG_HIST_TRIGGERS=y -CONFIG_LKDTM=m -CONFIG_PERCPU_TEST=m -CONFIG_ATOMIC64_SELFTEST=y -CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y -CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y -CONFIG_ENCRYPTED_KEYS=m -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 -CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_INTEGRITY_SIGNATURE=y -CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y -CONFIG_IMA=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set -CONFIG_CRYPTO_PCRYPT=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_MCRYPTD=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_842=m -CONFIG_CRYPTO_LZ4=m -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_ANSI_CPRNG=m -CONFIG_CRYPTO_USER_API_HASH=m -CONFIG_CRYPTO_USER_API_SKCIPHER=m -CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_ZCRYPT=m -CONFIG_PKEY=m -CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRC7=m -CONFIG_CRC8=m -CONFIG_CORDIC=m -CONFIG_CMM=m -CONFIG_APPLDATA_BASE=y -CONFIG_KVM=m -CONFIG_KVM_S390_UCONTROL=y -CONFIG_VHOST_NET=m From 49d23a851d62c03daebae2d245dcc9b07dbfa89f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 12 Apr 2018 11:01:07 +0200 Subject: [PATCH 147/660] s390: rename default_defconfig to debug_defconfig The name debug_defconfig reflects what the config is actually good for and should be less confusing. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/configs/{default_defconfig => debug_defconfig} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename arch/s390/configs/{default_defconfig => debug_defconfig} (100%) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/debug_defconfig similarity index 100% rename from arch/s390/configs/default_defconfig rename to arch/s390/configs/debug_defconfig From c326599b29f0dac8d73147309ffd31cce2cfdc05 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 9 Apr 2018 11:24:03 +0100 Subject: [PATCH 148/660] arm64: dts: juno: drop unnecessary address-cells and size-cells properties /smb@8000000/motherboard/gpio_keys node doesn't have "ranges" or "reg" property in child nodes. So it's unnecessary to have address-cells as well as size-cells properties which results in below warning. Warning (avoid_unnecessary_addr_size): /smb@8000000/motherboard/gpio_keys: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property This patch drops the unnecessary address+size-cell properties. Cc: Lorenzo Pieralisi Reviewed-by: Liviu Dudau Signed-off-by: Sudeep Holla --- arch/arm64/boot/dts/arm/juno-motherboard.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi index 2ac43221ddb6..69804c5f1197 100644 --- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi @@ -56,8 +56,6 @@ gpio_keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; power-button { debounce_interval = <50>; From d27a3c3436a7f68f3affd7991cb4a68e91dd747e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Mar 2018 18:01:58 +0000 Subject: [PATCH 149/660] firmware: arm_scmi: remove redundant null check on array The null check on clk->name is redundant since name is a char array and can never be null, so the check is always true. Remove it. Detected by CoverityScan, CID#1466117 ("Array compared against 0") Signed-off-by: Colin Ian King Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index e6f17825db79..2b90606452a2 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -284,7 +284,7 @@ scmi_clock_info_get(const struct scmi_handle *handle, u32 clk_id) struct clock_info *ci = handle->clk_priv; struct scmi_clock_info *clk = ci->clk + clk_id; - if (!clk->name || !clk->name[0]) + if (!clk->name[0]) return NULL; return clk; From 1e23aace21515a8f7615a1de016c0ea8d4e0cc6e Mon Sep 17 00:00:00 2001 From: Kyle Roeschley Date: Mon, 9 Apr 2018 10:23:55 -0500 Subject: [PATCH 150/660] USB: serial: cp210x: add ID for NI USB serial console Added the USB VID and PID for the USB serial console on some National Instruments devices. Signed-off-by: Kyle Roeschley Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index de1e759dd512..eb6c26cbe579 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -214,6 +214,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */ { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */ { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */ + { USB_DEVICE(0x3923, 0x7A0B) }, /* National Instruments USB Serial Console */ { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */ { } /* Terminating Entry */ }; From 470b5d6f0cf4674be2d1ec94e54283a1770b6a1a Mon Sep 17 00:00:00 2001 From: Vasyl Vavrychuk Date: Wed, 11 Apr 2018 17:05:13 +0300 Subject: [PATCH 151/660] USB: serial: ftdi_sio: use jtag quirk for Arrow USB Blaster Arrow USB Blaster integrated on MAX1000 board uses the same vendor ID (0x0403) and product ID (0x6010) as the "original" FTDI device. This patch avoids picking up by ftdi_sio of the first interface of this USB device. After that this device can be used by Arrow user-space JTAG driver. Signed-off-by: Vasyl Vavrychuk Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 87202ad5a50d..7ea221d42dba 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1898,7 +1898,8 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial) return ftdi_jtag_probe(serial); if (udev->product && - (!strcmp(udev->product, "BeagleBone/XDS100V2") || + (!strcmp(udev->product, "Arrow USB Blaster") || + !strcmp(udev->product, "BeagleBone/XDS100V2") || !strcmp(udev->product, "SNAP Connect E10"))) return ftdi_jtag_probe(serial); From edf5c17d866eada03b8750368a12dc3def77d608 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 5 Apr 2018 16:15:55 +0200 Subject: [PATCH 152/660] staging: irda: remove remaining remants of irda code removal There were some documentation locations that irda was mentioned, as well as an old MAINTAINERS entry and the networking sysctl entries. Clean these all out as this stuff really is finally gone. Reported-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Documentation/ioctl/ioctl-number.txt | 2 -- Documentation/networking/ip-sysctl.txt | 15 --------------- Documentation/process/magic-number.rst | 3 --- MAINTAINERS | 10 ---------- include/uapi/linux/sysctl.h | 18 ------------------ kernel/sysctl_binary.c | 20 +------------------- 6 files changed, 1 insertion(+), 67 deletions(-) diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 84bb74dcae12..7f7413e597f3 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -217,7 +217,6 @@ Code Seq#(hex) Include File Comments 'd' 02-40 pcmcia/ds.h conflict! 'd' F0-FF linux/digi1.h 'e' all linux/digi1.h conflict! -'e' 00-1F drivers/net/irda/irtty-sir.h conflict! 'f' 00-1F linux/ext2_fs.h conflict! 'f' 00-1F linux/ext3_fs.h conflict! 'f' 00-0F fs/jfs/jfs_dinode.h conflict! @@ -247,7 +246,6 @@ Code Seq#(hex) Include File Comments 'm' all linux/synclink.h conflict! 'm' 00-19 drivers/message/fusion/mptctl.h conflict! 'm' 00 drivers/scsi/megaraid/megaraid_ioctl.h conflict! -'m' 00-1F net/irda/irmod.h conflict! 'n' 00-7F linux/ncp_fs.h and fs/ncpfs/ioctl.c 'n' 80-8F uapi/linux/nilfs2_api.h NILFS2 'n' E0-FF linux/matroxfb.h matroxfb diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5dc1a040a2f1..8725aa718b35 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -2126,18 +2126,3 @@ max_dgram_qlen - INTEGER Default: 10 - -UNDOCUMENTED: - -/proc/sys/net/irda/* - fast_poll_increase FIXME - warn_noreply_time FIXME - discovery_slots FIXME - slot_timeout FIXME - max_baud_rate FIXME - discovery_timeout FIXME - lap_keepalive_time FIXME - max_noreply_time FIXME - max_tx_data_size FIXME - max_tx_window FIXME - min_tx_turn_time FIXME diff --git a/Documentation/process/magic-number.rst b/Documentation/process/magic-number.rst index 00cecf1fcba9..633be1043690 100644 --- a/Documentation/process/magic-number.rst +++ b/Documentation/process/magic-number.rst @@ -157,8 +157,5 @@ memory management. See ``include/sound/sndmagic.h`` for complete list of them. M OSS sound drivers have their magic numbers constructed from the soundcard PCI ID - these are not listed here as well. -IrDA subsystem also uses large number of own magic numbers, see -``include/net/irda/irda.h`` for a complete list of them. - HFS is another larger user of magic numbers - you can find them in ``fs/hfs/hfs.h``. diff --git a/MAINTAINERS b/MAINTAINERS index 0a1410d5a621..c74c80e44879 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7396,16 +7396,6 @@ S: Obsolete F: include/uapi/linux/ipx.h F: drivers/staging/ipx/ -IRDA SUBSYSTEM -M: Samuel Ortiz -L: irda-users@lists.sourceforge.net (subscribers-only) -L: netdev@vger.kernel.org -W: http://irda.sourceforge.net/ -S: Obsolete -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sameo/irda-2.6.git -F: Documentation/networking/irda.txt -F: drivers/staging/irda/ - IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) M: Marc Zyngier S: Maintained diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 0f272818a4d2..6b58371b1f0d 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -780,24 +780,6 @@ enum { NET_BRIDGE_NF_FILTER_PPPOE_TAGGED = 5, }; -/* proc/sys/net/irda */ -enum { - NET_IRDA_DISCOVERY=1, - NET_IRDA_DEVNAME=2, - NET_IRDA_DEBUG=3, - NET_IRDA_FAST_POLL=4, - NET_IRDA_DISCOVERY_SLOTS=5, - NET_IRDA_DISCOVERY_TIMEOUT=6, - NET_IRDA_SLOT_TIMEOUT=7, - NET_IRDA_MAX_BAUD_RATE=8, - NET_IRDA_MIN_TX_TURN_TIME=9, - NET_IRDA_MAX_TX_DATA_SIZE=10, - NET_IRDA_MAX_TX_WINDOW=11, - NET_IRDA_MAX_NOREPLY_TIME=12, - NET_IRDA_WARN_NOREPLY_TIME=13, - NET_IRDA_LAP_KEEPALIVE_TIME=14, -}; - /* CTL_FS names: */ enum diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index e8c0dab4fd65..07148b497451 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -704,24 +704,6 @@ static const struct bin_table bin_net_netfilter_table[] = { {} }; -static const struct bin_table bin_net_irda_table[] = { - { CTL_INT, NET_IRDA_DISCOVERY, "discovery" }, - { CTL_STR, NET_IRDA_DEVNAME, "devname" }, - { CTL_INT, NET_IRDA_DEBUG, "debug" }, - { CTL_INT, NET_IRDA_FAST_POLL, "fast_poll_increase" }, - { CTL_INT, NET_IRDA_DISCOVERY_SLOTS, "discovery_slots" }, - { CTL_INT, NET_IRDA_DISCOVERY_TIMEOUT, "discovery_timeout" }, - { CTL_INT, NET_IRDA_SLOT_TIMEOUT, "slot_timeout" }, - { CTL_INT, NET_IRDA_MAX_BAUD_RATE, "max_baud_rate" }, - { CTL_INT, NET_IRDA_MIN_TX_TURN_TIME, "min_tx_turn_time" }, - { CTL_INT, NET_IRDA_MAX_TX_DATA_SIZE, "max_tx_data_size" }, - { CTL_INT, NET_IRDA_MAX_TX_WINDOW, "max_tx_window" }, - { CTL_INT, NET_IRDA_MAX_NOREPLY_TIME, "max_noreply_time" }, - { CTL_INT, NET_IRDA_WARN_NOREPLY_TIME, "warn_noreply_time" }, - { CTL_INT, NET_IRDA_LAP_KEEPALIVE_TIME, "lap_keepalive_time" }, - {} -}; - static const struct bin_table bin_net_table[] = { { CTL_DIR, NET_CORE, "core", bin_net_core_table }, /* NET_ETHER not used */ @@ -743,7 +725,7 @@ static const struct bin_table bin_net_table[] = { { CTL_DIR, NET_LLC, "llc", bin_net_llc_table }, { CTL_DIR, NET_NETFILTER, "netfilter", bin_net_netfilter_table }, /* NET_DCCP "dccp" no longer used */ - { CTL_DIR, NET_IRDA, "irda", bin_net_irda_table }, + /* NET_IRDA "irda" no longer used */ { CTL_INT, 2089, "nf_conntrack_max" }, {} }; From 2c2bf522ed8cbfaac666f7dc65cfd38de2b89f0f Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Fri, 13 Apr 2018 09:50:44 +0100 Subject: [PATCH 153/660] MIPS: dts: Boston: Fix PCI bus dtc warnings: dtc recently (v1.4.4-8-g756ffc4f52f6) added PCI bus checks. Fix the warnings now emitted: arch/mips/boot/dts/img/boston.dtb: Warning (pci_bridge): /pci@10000000: missing bus-range for PCI bridge arch/mips/boot/dts/img/boston.dtb: Warning (pci_bridge): /pci@12000000: missing bus-range for PCI bridge arch/mips/boot/dts/img/boston.dtb: Warning (pci_bridge): /pci@14000000: missing bus-range for PCI bridge Signed-off-by: Matt Redfearn Cc: Ralf Baechle Cc: Paul Burton Cc: Rob Herring Cc: Mark Rutland Cc: linux-mips@linux-mips.org Cc: devicetree@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/19070/ Signed-off-by: James Hogan --- arch/mips/boot/dts/img/boston.dts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/boot/dts/img/boston.dts b/arch/mips/boot/dts/img/boston.dts index 1bd105428f61..65af3f6ba81c 100644 --- a/arch/mips/boot/dts/img/boston.dts +++ b/arch/mips/boot/dts/img/boston.dts @@ -51,6 +51,8 @@ ranges = <0x02000000 0 0x40000000 0x40000000 0 0x40000000>; + bus-range = <0x00 0xff>; + interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pci0_intc 1>, <0 0 0 2 &pci0_intc 2>, @@ -79,6 +81,8 @@ ranges = <0x02000000 0 0x20000000 0x20000000 0 0x20000000>; + bus-range = <0x00 0xff>; + interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pci1_intc 1>, <0 0 0 2 &pci1_intc 2>, @@ -107,6 +111,8 @@ ranges = <0x02000000 0 0x16000000 0x16000000 0 0x100000>; + bus-range = <0x00 0xff>; + interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pci2_intc 1>, <0 0 0 2 &pci2_intc 2>, From 660661afcd40ed7f515ef3369721ed58e80c0fc5 Mon Sep 17 00:00:00 2001 From: Victor Gu Date: Fri, 6 Apr 2018 16:55:31 +0200 Subject: [PATCH 154/660] PCI: aardvark: Fix logic in advk_pcie_{rd,wr}_conf() The PCI configuration space read/write functions were special casing the situation where PCI_SLOT(devfn) != 0, and returned PCIBIOS_DEVICE_NOT_FOUND in this case. However, while this is what is intended for the root bus, it is not intended for the child busses, as it prevents discovering devices with PCI_SLOT(x) != 0. Therefore, we return PCIBIOS_DEVICE_NOT_FOUND only if we're on the root bus. Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller driver") Cc: Signed-off-by: Victor Gu Reviewed-by: Wilson Ding Reviewed-by: Nadav Haklai [Thomas: tweak commit log.] Signed-off-by: Thomas Petazzoni Signed-off-by: Lorenzo Pieralisi --- drivers/pci/host/pci-aardvark.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index b04d37b3c5de..b72f15c99793 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -437,7 +437,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, u32 reg; int ret; - if (PCI_SLOT(devfn) != 0) { + if ((bus->number == pcie->root_bus_nr) && PCI_SLOT(devfn) != 0) { *val = 0xffffffff; return PCIBIOS_DEVICE_NOT_FOUND; } @@ -491,7 +491,7 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, int offset; int ret; - if (PCI_SLOT(devfn) != 0) + if ((bus->number == pcie->root_bus_nr) && PCI_SLOT(devfn) != 0) return PCIBIOS_DEVICE_NOT_FOUND; if (where % size) From 4fa3999ee672c54a5498ce98e20fe3fdf9c1cbb4 Mon Sep 17 00:00:00 2001 From: Victor Gu Date: Fri, 6 Apr 2018 16:55:32 +0200 Subject: [PATCH 155/660] PCI: aardvark: Set PIO_ADDR_LS correctly in advk_pcie_rd_conf() When setting the PIO_ADDR_LS register during a configuration read, we were properly passing the device number, function number and register number, but not the bus number, causing issues when reading the configuration of PCIe devices. Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller driver") Cc: Signed-off-by: Victor Gu Reviewed-by: Wilson Ding Reviewed-by: Nadav Haklai [Thomas: tweak commit log.] Signed-off-by: Thomas Petazzoni Signed-off-by: Lorenzo Pieralisi --- drivers/pci/host/pci-aardvark.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index b72f15c99793..5b8201aaf34d 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -172,8 +172,6 @@ #define PCIE_CONFIG_WR_TYPE0 0xa #define PCIE_CONFIG_WR_TYPE1 0xb -/* PCI_BDF shifts 8bit, so we need extra 4bit shift */ -#define PCIE_BDF(dev) (dev << 4) #define PCIE_CONF_BUS(bus) (((bus) & 0xff) << 20) #define PCIE_CONF_DEV(dev) (((dev) & 0x1f) << 15) #define PCIE_CONF_FUNC(fun) (((fun) & 0x7) << 12) @@ -456,7 +454,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, advk_writel(pcie, reg, PIO_CTRL); /* Program the address registers */ - reg = PCIE_BDF(devfn) | PCIE_CONF_REG(where); + reg = PCIE_CONF_ADDR(bus->number, devfn, where); advk_writel(pcie, reg, PIO_ADDR_LS); advk_writel(pcie, 0, PIO_ADDR_MS); From 3430f924a62905891c8fa9a3b97ea52007795bc3 Mon Sep 17 00:00:00 2001 From: Victor Gu Date: Fri, 6 Apr 2018 16:55:33 +0200 Subject: [PATCH 156/660] PCI: aardvark: Use ISR1 instead of ISR0 interrupt in legacy irq mode The Aardvark has two interrupts sets: - first set is bit[23:16] of PCIe ISR 0 register(RD0074840h) - second set is bit[11:8] of PCIe ISR 1 register(RD0074848h) Only one set should be used, while another set should be masked. The second set, ISR1, is more advanced, the Legacy INT_X status bit is asserted once Assert_INTX message is received, and de-asserted after Deassert_INTX message is received which matches what the driver is currently doing in the ->irq_mask() and ->irq_unmask() functions. The ISR0 requires additional work to deassert the interrupt, which the driver does not currently implement, therefore it needs fixing. Update the driver to use ISR1 register set, fixing current implementation. Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller driver") Link: https://bugzilla.kernel.org/show_bug.cgi?id=196339 Signed-off-by: Victor Gu [Thomas: tweak commit log.] Signed-off-by: Thomas Petazzoni [lorenzo.pieralisi@arm.com: updated the commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Evan Wang Reviewed-by: Nadav Haklai Cc: --- drivers/pci/host/pci-aardvark.c | 41 +++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index 5b8201aaf34d..7a0ddb709052 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -100,7 +100,8 @@ #define PCIE_ISR1_MASK_REG (CONTROL_BASE_ADDR + 0x4C) #define PCIE_ISR1_POWER_STATE_CHANGE BIT(4) #define PCIE_ISR1_FLUSH BIT(5) -#define PCIE_ISR1_ALL_MASK GENMASK(5, 4) +#define PCIE_ISR1_INTX_ASSERT(val) BIT(8 + (val)) +#define PCIE_ISR1_ALL_MASK GENMASK(11, 4) #define PCIE_MSI_ADDR_LOW_REG (CONTROL_BASE_ADDR + 0x50) #define PCIE_MSI_ADDR_HIGH_REG (CONTROL_BASE_ADDR + 0x54) #define PCIE_MSI_STATUS_REG (CONTROL_BASE_ADDR + 0x58) @@ -607,9 +608,9 @@ static void advk_pcie_irq_mask(struct irq_data *d) irq_hw_number_t hwirq = irqd_to_hwirq(d); u32 mask; - mask = advk_readl(pcie, PCIE_ISR0_MASK_REG); - mask |= PCIE_ISR0_INTX_ASSERT(hwirq); - advk_writel(pcie, mask, PCIE_ISR0_MASK_REG); + mask = advk_readl(pcie, PCIE_ISR1_MASK_REG); + mask |= PCIE_ISR1_INTX_ASSERT(hwirq); + advk_writel(pcie, mask, PCIE_ISR1_MASK_REG); } static void advk_pcie_irq_unmask(struct irq_data *d) @@ -618,9 +619,9 @@ static void advk_pcie_irq_unmask(struct irq_data *d) irq_hw_number_t hwirq = irqd_to_hwirq(d); u32 mask; - mask = advk_readl(pcie, PCIE_ISR0_MASK_REG); - mask &= ~PCIE_ISR0_INTX_ASSERT(hwirq); - advk_writel(pcie, mask, PCIE_ISR0_MASK_REG); + mask = advk_readl(pcie, PCIE_ISR1_MASK_REG); + mask &= ~PCIE_ISR1_INTX_ASSERT(hwirq); + advk_writel(pcie, mask, PCIE_ISR1_MASK_REG); } static int advk_pcie_irq_map(struct irq_domain *h, @@ -763,29 +764,35 @@ static void advk_pcie_handle_msi(struct advk_pcie *pcie) static void advk_pcie_handle_int(struct advk_pcie *pcie) { - u32 val, mask, status; + u32 isr0_val, isr0_mask, isr0_status; + u32 isr1_val, isr1_mask, isr1_status; int i, virq; - val = advk_readl(pcie, PCIE_ISR0_REG); - mask = advk_readl(pcie, PCIE_ISR0_MASK_REG); - status = val & ((~mask) & PCIE_ISR0_ALL_MASK); + isr0_val = advk_readl(pcie, PCIE_ISR0_REG); + isr0_mask = advk_readl(pcie, PCIE_ISR0_MASK_REG); + isr0_status = isr0_val & ((~isr0_mask) & PCIE_ISR0_ALL_MASK); - if (!status) { - advk_writel(pcie, val, PCIE_ISR0_REG); + isr1_val = advk_readl(pcie, PCIE_ISR1_REG); + isr1_mask = advk_readl(pcie, PCIE_ISR1_MASK_REG); + isr1_status = isr1_val & ((~isr1_mask) & PCIE_ISR1_ALL_MASK); + + if (!isr0_status && !isr1_status) { + advk_writel(pcie, isr0_val, PCIE_ISR0_REG); + advk_writel(pcie, isr1_val, PCIE_ISR1_REG); return; } /* Process MSI interrupts */ - if (status & PCIE_ISR0_MSI_INT_PENDING) + if (isr0_status & PCIE_ISR0_MSI_INT_PENDING) advk_pcie_handle_msi(pcie); /* Process legacy interrupts */ for (i = 0; i < PCI_NUM_INTX; i++) { - if (!(status & PCIE_ISR0_INTX_ASSERT(i))) + if (!(isr1_status & PCIE_ISR1_INTX_ASSERT(i))) continue; - advk_writel(pcie, PCIE_ISR0_INTX_ASSERT(i), - PCIE_ISR0_REG); + advk_writel(pcie, PCIE_ISR1_INTX_ASSERT(i), + PCIE_ISR1_REG); virq = irq_find_mapping(pcie->irq_domain, i); generic_handle_irq(virq); From fc31c4e347c9dad50544d01d5ee98b22c7df88bb Mon Sep 17 00:00:00 2001 From: Evan Wang Date: Fri, 6 Apr 2018 16:55:34 +0200 Subject: [PATCH 157/660] PCI: aardvark: Fix PCIe Max Read Request Size setting There is an obvious typo issue in the definition of the PCIe maximum read request size: a bit shift is directly used as a value, while it should be used to shift the correct value. Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller driver") Cc: Signed-off-by: Evan Wang Reviewed-by: Victor Gu Reviewed-by: Nadav Haklai [Thomas: tweak commit log.] Signed-off-by: Thomas Petazzoni Signed-off-by: Lorenzo Pieralisi --- drivers/pci/host/pci-aardvark.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index 7a0ddb709052..9abf549631b4 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -29,6 +29,7 @@ #define PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT 5 #define PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE (0 << 11) #define PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT 12 +#define PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SZ 0x2 #define PCIE_CORE_LINK_CTRL_STAT_REG 0xd0 #define PCIE_CORE_LINK_L0S_ENTRY BIT(0) #define PCIE_CORE_LINK_TRAINING BIT(5) @@ -295,7 +296,8 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg = PCIE_CORE_DEV_CTRL_STATS_RELAX_ORDER_DISABLE | (7 << PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT) | PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE | - PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT; + (PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SZ << + PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT); advk_writel(pcie, reg, PCIE_CORE_DEV_CTRL_STATS_REG); /* Program PCIe Control 2 to disable strict ordering */ From af52f9982e410edac21ca4b49563053ffc9da1eb Mon Sep 17 00:00:00 2001 From: David Wang Date: Mon, 16 Apr 2018 17:48:09 +0800 Subject: [PATCH 158/660] ALSA: hda - New VIA controller suppor no-snoop path This patch is used to tell kernel that new VIA HDAC controller also support no-snoop path. [ minor coding style fix by tiwai ] Signed-off-by: David Wang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index ccffce068634..fece779f6260 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1645,7 +1645,8 @@ static void azx_check_snoop_available(struct azx *chip) */ u8 val; pci_read_config_byte(chip->pci, 0x42, &val); - if (!(val & 0x80) && chip->pci->revision == 0x30) + if (!(val & 0x80) && (chip->pci->revision == 0x30 || + chip->pci->revision == 0x20)) snoop = false; } From bd28899dd34f9283c567f7eeb31bb546f10820b5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 Apr 2018 13:17:50 +0300 Subject: [PATCH 159/660] Revert "macsec: missing dev_put() on error in macsec_newlink()" This patch is just wrong, sorry. I was trying to fix a static checker warning and misread the code. The reference taken in macsec_newlink() is released in macsec_free_netdev() when the netdevice is destroyed. This reverts commit 5dcd8400884cc4a043a6d4617e042489e5d566a9. Reported-by: Laura Abbott Fixes: 5dcd8400884c ("macsec: missing dev_put() on error in macsec_newlink()") Signed-off-by: Dan Carpenter Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9cbb0c8a896a..7de88b33d5b9 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3277,7 +3277,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) - goto put_dev; + goto unregister; /* need to be already registered so that ->init has run and * the MAC addr is set @@ -3316,8 +3316,7 @@ del_dev: macsec_del_dev(macsec); unlink: netdev_upper_dev_unlink(real_dev, dev); -put_dev: - dev_put(real_dev); +unregister: unregister_netdevice(dev); return err; } From 982e05001c472066ab288e4269ad6cab48889f0d Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 16 Apr 2018 10:07:23 +0200 Subject: [PATCH 160/660] net: mvpp2: Fix TCAM filter reserved range Marvell's PPv2 controller has a Packet Header parser, which uses a fixed-size TCAM array of filter entries. The mvpp2 driver reserves some ranges among the 256 TCAM entries to perform MAC and VID filtering. The rest of the TCAM ids are freely usable for other features, such as IPv4 proto matching. This commit fixes the MVPP2_PE_LAST_FREE_TID define that sets the end of the "free range", which included the MAC range. This could therefore allow some other features to use entries dedicated to MAC filtering, lowering the number of unicast/multicast addresses that could be allowed before switching to promiscuous mode. Fixes: 10fea26ce2aa ("net: mvpp2: Add support for unicast filtering") Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 54a038943c06..9deb79b6dcc8 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -663,7 +663,7 @@ enum mvpp2_tag_type { #define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31) #define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \ MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1) -#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_VID_FILT_RANGE_START - 1) +#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_MAC_RANGE_START - 1) #define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30) #define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 29) #define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28) From 6e8fe39989720b87439fee7817a5ca362b16d931 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 26 Mar 2018 14:50:00 -0500 Subject: [PATCH 161/660] ARM: socfpga_defconfig: Remove QSPI Sector 4K size force Remove QSPI Sector 4K size force which is causing QSPI boot problems with the JFFS2 root filesystem. Fixes the following error: "Magic bitmask 0x1985 not found at ..." Cc: stable@vger.kernel.org Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen --- arch/arm/configs/socfpga_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 2620ce790db0..371fca4e1ab7 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -57,6 +57,7 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_DENALI_DT=y CONFIG_MTD_SPI_NOR=y +# CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set CONFIG_SPI_CADENCE_QUADSPI=y CONFIG_OF_OVERLAY=y CONFIG_OF_CONFIGFS=y From cc5cd5079699c7831fdc58e74352736706c3df3c Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 9 Apr 2018 23:03:36 +0800 Subject: [PATCH 162/660] xen: xen-pciback: Replace GFP_ATOMIC with GFP_KERNEL in pcistub_probe pcistub_probe() is never called in atomic context. This function is only set as ".probe" in struct pci_driver. Despite never getting called from atomic context, pcistub_probe() calls kmalloc() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. This is found by a static analysis tool named DCNS written by myself. And I also manually check it. Signed-off-by: Jia-Ju Bai Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-pciback/pci_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 9e480fdebe1f..95e6ddd7c402 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -577,7 +577,7 @@ static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) } if (!match) { - pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_ATOMIC); + pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); if (!pci_dev_id) { err = -ENOMEM; goto out; From bb52e3169cb7dd5a9deea39b94342fce36235a5b Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 9 Apr 2018 23:03:53 +0800 Subject: [PATCH 163/660] xen: xen-pciback: Replace GFP_ATOMIC with GFP_KERNEL in pcistub_init_device pcistub_init_device() is never called in atomic context. The call chain ending up at pcistub_init_device() is: [1] pcistub_init_device() <- pcistub_seize() <- pcistub_probe() [2] pcistub_init_device() <- pcistub_init_devices_late() <- xen_pcibk_init() pcistub_probe() is only set as ".probe" in struct pci_driver. xen_pcibk_init() is is only set as a parameter of module_init(). These functions are not called in atomic context. Despite never getting called from atomic context, pcistub_init_device() calls kzalloc() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. This is found by a static analysis tool named DCNS written by myself. And I also manually check it. Signed-off-by: Jia-Ju Bai Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-pciback/pci_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 95e6ddd7c402..b36fa76a69c7 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -364,7 +364,7 @@ static int pcistub_init_device(struct pci_dev *dev) * here and then to call kfree(pci_get_drvdata(psdev->dev)). */ dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]") - + strlen(pci_name(dev)) + 1, GFP_ATOMIC); + + strlen(pci_name(dev)) + 1, GFP_KERNEL); if (!dev_data) { err = -ENOMEM; goto out; From 9eb5f15b47b69847bfceb94350bd68fbdbf829e3 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 9 Apr 2018 23:04:12 +0800 Subject: [PATCH 164/660] xen: xen-pciback: Replace GFP_ATOMIC with GFP_KERNEL in pcistub_device_alloc pcistub_device_alloc() is never called in atomic context. The call chain ending up at pcistub_device_alloc() is: [1] pcistub_device_alloc() <- pcistub_seize() <- pcistub_probe() pcistub_probe() is only set as ".probe" in struct pci_driver. This function is not called in atomic context. Despite never getting called from atomic context, pcistub_device_alloc() calls kzalloc() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. Signed-off-by: Jia-Ju Bai Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-pciback/pci_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index b36fa76a69c7..210b93f41385 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -71,7 +71,7 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) dev_dbg(&dev->dev, "pcistub_device_alloc\n"); - psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); + psdev = kzalloc(sizeof(*psdev), GFP_KERNEL); if (!psdev) return NULL; From 230d211472d2779253e5a8383353fc44783dd038 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 9 Apr 2018 23:04:25 +0800 Subject: [PATCH 165/660] xen: xen-pciback: Replace GFP_ATOMIC with GFP_KERNEL in xen_pcibk_config_quirks_init xen_pcibk_config_quirks_init() is never called in atomic context. The call chains ending up at xen_pcibk_config_quirks_init() are: [1] xen_pcibk_config_quirks_init() <- xen_pcibk_config_init_dev() <- pcistub_init_device() <- pcistub_seize() <- pcistub_probe() [2] xen_pcibk_config_quirks_init() <- xen_pcibk_config_init_dev() <- pcistub_init_device() <- pcistub_init_devices_late() <- xen_pcibk_init() pcistub_probe() is only set as ".probe" in struct pci_driver. xen_pcibk_init() is is only set as a parameter of module_init(). These functions are not called in atomic context. Despite never getting called from atomic context, xen_pcibk_config_quirks_init() calls kzalloc() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. Signed-off-by: Jia-Ju Bai Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-pciback/conf_space_quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c index 89d9744ece61..ed593d1042a6 100644 --- a/drivers/xen/xen-pciback/conf_space_quirks.c +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -95,7 +95,7 @@ int xen_pcibk_config_quirks_init(struct pci_dev *dev) struct xen_pcibk_config_quirk *quirk; int ret = 0; - quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); + quirk = kzalloc(sizeof(*quirk), GFP_KERNEL); if (!quirk) { ret = -ENOMEM; goto out; From de3d01fd8549ec0444fc917aab711b3f884930c5 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 11 Apr 2018 09:15:31 +0800 Subject: [PATCH 166/660] xen: xen-pciback: Replace GFP_ATOMIC with GFP_KERNEL in pcistub_reg_add pcistub_reg_add() is never called in atomic context. pcistub_reg_add() is only called by pcistub_quirk_add, which is only set in DRIVER_ATTR(). Despite never getting called from atomic context, pcistub_reg_add() calls kzalloc() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. This is found by a static analysis tool named DCNS written by myself. And I also manually check it. Signed-off-by: Jia-Ju Bai Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-pciback/pci_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 210b93f41385..59661db144e5 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -1149,7 +1149,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, } dev = psdev->dev; - field = kzalloc(sizeof(*field), GFP_ATOMIC); + field = kzalloc(sizeof(*field), GFP_KERNEL); if (!field) { err = -ENOMEM; goto out; From b8858581febb050688e276b956796bc4a78299ed Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 16 Apr 2018 23:25:19 +1000 Subject: [PATCH 167/660] powerpc/lib: Fix off-by-one in alternate feature patching When we patch an alternate feature section, we have to adjust any relative branches that branch out of the alternate section. But currently we have a bug if we have a branch that points to past the last instruction of the alternate section, eg: FTR_SECTION_ELSE 1: b 2f or 6,6,6 2: ALT_FTR_SECTION_END(...) nop This will result in a relative branch at 1 with a target that equals the end of the alternate section. That branch does not need adjusting when it's moved to the non-else location. Currently we do adjust it, resulting in a branch that goes off into the link-time location of the else section, which is junk. The fix is to not patch branches that have a target == end of the alternate section. Fixes: d20fe50a7b3c ("KVM: PPC: Book3S HV: Branch inside feature section") Fixes: 9b1a735de64c ("powerpc: Add logic to patch alternative feature sections") Cc: stable@vger.kernel.org # v2.6.27+ Signed-off-by: Michael Ellerman --- arch/powerpc/lib/feature-fixups.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 35f80ab7cbd8..288fe4f0db4e 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -55,7 +55,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, unsigned int *target = (unsigned int *)branch_target(src); /* Branch within the section doesn't need translating */ - if (target < alt_start || target >= alt_end) { + if (target < alt_start || target > alt_end) { instr = translate_branch(dest, src); if (!instr) return 1; From 4fb0534fb7bbc2346ba7d3a072b538007f4135a5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 Apr 2018 13:59:25 +0200 Subject: [PATCH 168/660] team: avoid adding twice the same option to the event list When parsing the options provided by the user space, team_nl_cmd_options_set() insert them in a temporary list to send multiple events with a single message. While each option's attribute is correctly validated, the code does not check for duplicate entries before inserting into the event list. Exploiting the above, the syzbot was able to trigger the following splat: kernel BUG at lib/list_debug.c:31! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4466 Comm: syzkaller556835 Not tainted 4.16.0+ #17 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__list_add_valid+0xaa/0xb0 lib/list_debug.c:29 RSP: 0018:ffff8801b04bf248 EFLAGS: 00010286 RAX: 0000000000000058 RBX: ffff8801c8fc7a90 RCX: 0000000000000000 RDX: 0000000000000058 RSI: ffffffff815fbf41 RDI: ffffed0036097e3f RBP: ffff8801b04bf260 R08: ffff8801b0b2a700 R09: ffffed003b604f90 R10: ffffed003b604f90 R11: ffff8801db027c87 R12: ffff8801c8fc7a90 R13: ffff8801c8fc7a90 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000b98880(0000) GS:ffff8801db000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000043fc30 CR3: 00000001afe8e000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_add include/linux/list.h:60 [inline] list_add include/linux/list.h:79 [inline] team_nl_cmd_options_set+0x9ff/0x12b0 drivers/net/team/team.c:2571 genl_family_rcv_msg+0x889/0x1120 net/netlink/genetlink.c:599 genl_rcv_msg+0xc6/0x170 net/netlink/genetlink.c:624 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2448 genl_rcv+0x28/0x40 net/netlink/genetlink.c:635 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x58b/0x740 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x9f0/0xfa0 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:639 ___sys_sendmsg+0x805/0x940 net/socket.c:2117 __sys_sendmsg+0x115/0x270 net/socket.c:2155 SYSC_sendmsg net/socket.c:2164 [inline] SyS_sendmsg+0x29/0x30 net/socket.c:2162 do_syscall_64+0x29e/0x9d0 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x4458b9 RSP: 002b:00007ffd1d4a7278 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000000001b RCX: 00000000004458b9 RDX: 0000000000000010 RSI: 0000000020000d00 RDI: 0000000000000004 RBP: 00000000004a74ed R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000213 R12: 00007ffd1d4a7348 R13: 0000000000402a60 R14: 0000000000000000 R15: 0000000000000000 Code: 75 e8 eb a9 48 89 f7 48 89 75 e8 e8 d1 85 7b fe 48 8b 75 e8 eb bb 48 89 f2 48 89 d9 4c 89 e6 48 c7 c7 a0 84 d8 87 e8 ea 67 28 fe <0f> 0b 0f 1f 40 00 48 b8 00 00 00 00 00 fc ff df 55 48 89 e5 41 RIP: __list_add_valid+0xaa/0xb0 lib/list_debug.c:29 RSP: ffff8801b04bf248 This changeset addresses the avoiding list_add() if the current option is already present in the event list. Reported-and-tested-by: syzbot+4d4af685432dc0e56c91@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Fixes: 2fcdb2c9e659 ("team: allow to send multiple set events in one message") Signed-off-by: David S. Miller --- drivers/net/team/team.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a6c6ce19eeee..acbe84967834 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -261,6 +261,17 @@ static void __team_option_inst_mark_removed_port(struct team *team, } } +static bool __team_option_inst_tmp_find(const struct list_head *opts, + const struct team_option_inst *needle) +{ + struct team_option_inst *opt_inst; + + list_for_each_entry(opt_inst, opts, tmp_list) + if (opt_inst == needle) + return true; + return false; +} + static int __team_options_register(struct team *team, const struct team_option *option, size_t option_count) @@ -2568,6 +2579,14 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) if (err) goto team_put; opt_inst->changed = true; + + /* dumb/evil user-space can send us duplicate opt, + * keep only the last one + */ + if (__team_option_inst_tmp_find(&opt_inst_list, + opt_inst)) + continue; + list_add(&opt_inst->tmp_list, &opt_inst_list); } if (!opt_found) { From e7c5a571a8d6a266aee9ca3f3f26e5afe3717eca Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Apr 2018 12:34:24 -0700 Subject: [PATCH 169/660] libnvdimm, dimm: handle EACCES failures from label reads The new support for the standard _LSR and _LSW methods neglected to also update the nvdimm_init_config_data() and nvdimm_set_config_data() to return the translated error code from failed commands. This precision is necessary because the locked status that was previously returned on ND_CMD_GET_CONFIG_SIZE commands is now returned on ND_CMD_{GET,SET}_CONFIG_DATA commands. If the kernel misses this indication it can inadvertently fall back to label-less mode when it should otherwise avoid all access to locked regions. Cc: Fixes: 4b27db7e26cd ("acpi, nfit: add support for the _LSI, _LSR, and...") Signed-off-by: Dan Williams --- drivers/nvdimm/dimm_devs.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index e00d45522b80..8d348b22ba45 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -88,9 +88,9 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) { struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + int rc = validate_dimm(ndd), cmd_rc = 0; struct nd_cmd_get_config_data_hdr *cmd; struct nvdimm_bus_descriptor *nd_desc; - int rc = validate_dimm(ndd); u32 max_cmd_size, config_size; size_t offset; @@ -124,9 +124,11 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) cmd->in_offset = offset; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd), NULL); - if (rc || cmd->status) { - rc = -ENXIO; + cmd->in_length + sizeof(*cmd), &cmd_rc); + if (rc < 0) + break; + if (cmd_rc < 0) { + rc = cmd_rc; break; } memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); @@ -140,9 +142,9 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len) { - int rc = validate_dimm(ndd); size_t max_cmd_size, buf_offset; struct nd_cmd_set_config_hdr *cmd; + int rc = validate_dimm(ndd), cmd_rc = 0; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; @@ -164,7 +166,6 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, for (buf_offset = 0; len; len -= cmd->in_length, buf_offset += cmd->in_length) { size_t cmd_size; - u32 *status; cmd->in_offset = offset + buf_offset; cmd->in_length = min(max_cmd_size, len); @@ -172,12 +173,13 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, /* status is output in the last 4-bytes of the command buffer */ cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32); - status = ((void *) cmd) + cmd_size - sizeof(u32); rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL); - if (rc || *status) { - rc = rc ? rc : -ENXIO; + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, &cmd_rc); + if (rc < 0) + break; + if (cmd_rc < 0) { + rc = cmd_rc; break; } } From 55c72ab62e47fc584131901baddb2752e949ebcd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Apr 2018 13:56:43 -0700 Subject: [PATCH 170/660] tools/testing/nvdimm: allow custom error code injection Given that libnvdimm driver stack takes specific actions on DIMM command error codes like -EACCES, provide a facility to inject custom failures. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 38 +++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cb166be4918d..dc6cf5630280 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -138,6 +138,7 @@ static u32 handle[] = { }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +static int dimm_fail_cmd_code[NUM_DCR]; struct nfit_test_fw { enum intel_fw_update_state state; @@ -892,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) if (i >= ARRAY_SIZE(handle)) return -ENXIO; - if ((1 << func) & dimm_fail_cmd_flags[i]) + if ((1 << func) & dimm_fail_cmd_flags[i]) { + if (dimm_fail_cmd_code[i]) + return dimm_fail_cmd_code[i]; return -EIO; + } return i; } @@ -1225,8 +1229,40 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(fail_cmd); +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + unsigned long val; + ssize_t rc; + + if (dimm < 0) + return dimm; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm_fail_cmd_code[dimm] = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + + static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, &dev_attr_handle.attr, NULL, }; From 718fda67d2f69cc6074b4b6a740a6e4aacd44eff Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Apr 2018 16:38:01 -0700 Subject: [PATCH 171/660] tools/testing/nvdimm: support nfit_test_dimm attributes under nfit_test.1 The nfit_test.1 bus provides a pmem topology without blk-aperture enabling, so it presents different failure modes for label space handling. Allow custom DSM command error injection. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 43 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index dc6cf5630280..c8c88363311b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1166,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state) static void put_dimms(void *data) { - struct device **dimm_dev = data; + struct nfit_test *t = data; int i; - for (i = 0; i < NUM_DCR; i++) - if (dimm_dev[i]) - device_unregister(dimm_dev[i]); + for (i = 0; i < t->num_dcr; i++) + if (t->dimm_dev[i]) + device_unregister(t->dimm_dev[i]); } static struct class *nfit_test_dimm; @@ -1180,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev) { int dimm; - if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1 - || dimm >= NUM_DCR || dimm < 0) + if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1) return -ENXIO; return dimm; } - static ssize_t handle_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1259,7 +1257,6 @@ static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute * } static DEVICE_ATTR_RW(fail_cmd_code); - static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, &dev_attr_fail_cmd_code.attr, @@ -1276,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static int nfit_test_dimm_init(struct nfit_test *t) +{ + int i; + + if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t)) + return -ENOMEM; + for (i = 0; i < t->num_dcr; i++) { + t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, + &t->pdev.dev, 0, NULL, + nfit_test_dimm_attribute_groups, + "test_dimm%d", i + t->dcr_idx); + if (!t->dimm_dev[i]) + return -ENOMEM; + } + return 0; +} + static void smart_init(struct nfit_test *t) { int i; @@ -1371,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->_fit) return -ENOMEM; - if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev)) + if (nfit_test_dimm_init(t)) return -ENOMEM; - for (i = 0; i < NUM_DCR; i++) { - t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, - &t->pdev.dev, 0, NULL, - nfit_test_dimm_attribute_groups, - "test_dimm%d", i); - if (!t->dimm_dev[i]) - return -ENOMEM; - } - smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1413,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + if (nfit_test_dimm_init(t)) + return -ENOMEM; smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } From 19357a685e870eeb825cbb7fd3104082ab041987 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 10 Apr 2018 14:23:47 -0700 Subject: [PATCH 172/660] tools/testing/nvdimm: fix missing newline in nfit_test_dimm 'handle' attribute Sysfs userspace tooling generally expects the kernel to emit a newlines when reading sysfs attributes. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index c8c88363311b..51fc41c24a77 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1193,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr, if (dimm < 0) return dimm; - return sprintf(buf, "%#x", handle[dimm]); + return sprintf(buf, "%#x\n", handle[dimm]); } DEVICE_ATTR_RO(handle); From 9484e12d7999a3c82d8732b60c0149f038bdd985 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Apr 2018 14:29:33 -0700 Subject: [PATCH 173/660] tools/testing/nvdimm: enable labels for nfit_test.1 dimms Enable test cases for the kernel's fallback to label-less mode. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 51fc41c24a77..4ea385be528f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -2265,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, From bffd168c3fc5cc7d2bad4c668fa90e7a9010db4b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 14 Apr 2018 20:44:46 -0400 Subject: [PATCH 174/660] tcp: clear tp->packets_out when purging write queue Clear tp->packets_out when purging the write queue, otherwise tcp_rearm_rto() mistakenly assumes TCP write queue is not empty. This results in NULL pointer dereference. Also, remove the redundant `tp->packets_out = 0` from tcp_disconnect(), since tcp_disconnect() calls tcp_write_queue_purge(). Fixes: a27fd7a8ed38 (tcp: purge write queue upon RST) Reported-by: Subash Abhinov Kasiviswanathan Reported-by: Sami Farin Tested-by: Sami Farin Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4fa3f812b9ff..9ce1c726185e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2368,6 +2368,7 @@ void tcp_write_queue_purge(struct sock *sk) INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); + tcp_sk(sk)->packets_out = 0; } int tcp_disconnect(struct sock *sk, int flags) @@ -2417,7 +2418,6 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; - tp->packets_out = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; From f23e0643cd0b53e68e283b6f26194d56c28a2eb1 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Sun, 15 Apr 2018 18:53:36 -0500 Subject: [PATCH 175/660] ibmvnic: Clear pending interrupt after device reset Due to a firmware bug, the hypervisor can send an interrupt to a transmit or receive queue just prior to a partition migration, not allowing the device enough time to handle it and send an EOI. When the partition migrates, the interrupt is lost but an "EOI-pending" flag for the interrupt line is still set in firmware. No further interrupts will be sent until that flag is cleared, effectively freezing that queue. To workaround this, the driver will disable the hardware interrupt and send an H_EOI signal prior to re-enabling it. This will flush the pending EOI and allow the driver to continue operation. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f4a56a3c07ad..2df01ad98df7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1049,16 +1049,14 @@ static int __ibmvnic_open(struct net_device *netdev) netdev_dbg(netdev, "Enabling rx_scrq[%d] irq\n", i); if (prev_state == VNIC_CLOSED) enable_irq(adapter->rx_scrq[i]->irq); - else - enable_scrq_irq(adapter, adapter->rx_scrq[i]); + enable_scrq_irq(adapter, adapter->rx_scrq[i]); } for (i = 0; i < adapter->req_tx_queues; i++) { netdev_dbg(netdev, "Enabling tx_scrq[%d] irq\n", i); if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); - else - enable_scrq_irq(adapter, adapter->tx_scrq[i]); + enable_scrq_irq(adapter, adapter->tx_scrq[i]); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); @@ -1199,6 +1197,7 @@ static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter) if (adapter->tx_scrq[i]->irq) { netdev_dbg(netdev, "Disabling tx_scrq[%d] irq\n", i); + disable_scrq_irq(adapter, adapter->tx_scrq[i]); disable_irq(adapter->tx_scrq[i]->irq); } } @@ -1208,6 +1207,7 @@ static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter) if (adapter->rx_scrq[i]->irq) { netdev_dbg(netdev, "Disabling rx_scrq[%d] irq\n", i); + disable_scrq_irq(adapter, adapter->rx_scrq[i]); disable_irq(adapter->rx_scrq[i]->irq); } } @@ -2617,12 +2617,19 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, { struct device *dev = &adapter->vdev->dev; unsigned long rc; + u64 val; if (scrq->hw_irq > 0x100000000ULL) { dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq); return 1; } + val = (0xff000000) | scrq->hw_irq; + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", + val, rc); + rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); if (rc) From 5171b37d959641bbc619781caf62e61f7b940871 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Apr 2018 17:52:04 -0700 Subject: [PATCH 176/660] net: af_packet: fix race in PACKET_{R|T}X_RING In order to remove the race caught by syzbot [1], we need to lock the socket before using po->tp_version as this could change under us otherwise. This means lock_sock() and release_sock() must be done by packet_set_ring() callers. [1] : BUG: KMSAN: uninit-value in packet_set_ring+0x1254/0x3870 net/packet/af_packet.c:4249 CPU: 0 PID: 20195 Comm: syzkaller707632 Not tainted 4.16.0+ #83 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 packet_set_ring+0x1254/0x3870 net/packet/af_packet.c:4249 packet_setsockopt+0x12c6/0x5a90 net/packet/af_packet.c:3662 SYSC_setsockopt+0x4b8/0x570 net/socket.c:1849 SyS_setsockopt+0x76/0xa0 net/socket.c:1828 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x449099 RSP: 002b:00007f42b5307ce8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 000000000070003c RCX: 0000000000449099 RDX: 0000000000000005 RSI: 0000000000000107 RDI: 0000000000000003 RBP: 0000000000700038 R08: 000000000000001c R09: 0000000000000000 R10: 00000000200000c0 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000080eecf R14: 00007f42b53089c0 R15: 0000000000000001 Local variable description: ----req_u@packet_setsockopt Variable was created at: packet_setsockopt+0x13f/0x5a90 net/packet/af_packet.c:3612 SYSC_setsockopt+0x4b8/0x570 net/socket.c:1849 Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 616cb9c18f88..c31b0687396a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3008,6 +3008,7 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); + lock_sock(sk); if (po->rx_ring.pg_vec) { memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); @@ -3017,6 +3018,7 @@ static int packet_release(struct socket *sock) memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); } + release_sock(sk); f = fanout_release(sk); @@ -3643,6 +3645,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv union tpacket_req_u req_u; int len; + lock_sock(sk); switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: @@ -3653,12 +3656,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv len = sizeof(req_u.req3); break; } - if (optlen < len) - return -EINVAL; - if (copy_from_user(&req_u.req, optval, len)) - return -EFAULT; - return packet_set_ring(sk, &req_u, 0, - optname == PACKET_TX_RING); + if (optlen < len) { + ret = -EINVAL; + } else { + if (copy_from_user(&req_u.req, optval, len)) + ret = -EFAULT; + else + ret = packet_set_ring(sk, &req_u, 0, + optname == PACKET_TX_RING); + } + release_sock(sk); + return ret; } case PACKET_COPY_THRESH: { @@ -4208,8 +4216,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; - lock_sock(sk); - rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; @@ -4347,7 +4353,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: - release_sock(sk); return err; } From 114aa35d06d4920c537b72f9fa935de5dd205260 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 30 Mar 2018 13:22:06 -0700 Subject: [PATCH 177/660] netfilter: conntrack: silent a memory leak warning The following memory leak is false postive: unreferenced object 0xffff8f37f156fb38 (size 128): comm "softirq", pid 0, jiffies 4294899665 (age 11.292s) hex dump (first 32 bytes): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 00 00 00 00 30 00 20 00 48 6b 6b 6b 6b 6b 6b 6b ....0. .Hkkkkkkk backtrace: [<000000004fda266a>] __kmalloc_track_caller+0x10d/0x141 [<000000007b0a7e3c>] __krealloc+0x45/0x62 [<00000000d08e0bfb>] nf_ct_ext_add+0xdc/0x133 [<0000000099b47fd8>] init_conntrack+0x1b1/0x392 [<0000000086dc36ec>] nf_conntrack_in+0x1ee/0x34b [<00000000940592de>] nf_hook_slow+0x36/0x95 [<00000000d1bd4da7>] nf_hook.constprop.43+0x1c3/0x1dd [<00000000c3673266>] __ip_local_out+0xae/0xb4 [<000000003e4192a6>] ip_local_out+0x17/0x33 [<00000000b64356de>] igmp_ifc_timer_expire+0x23e/0x26f [<000000006a8f3032>] call_timer_fn+0x14c/0x2a5 [<00000000650c1725>] __run_timers.part.34+0x150/0x182 [<0000000090e6946e>] run_timer_softirq+0x2a/0x4c [<000000004d1e7293>] __do_softirq+0x1d1/0x3c2 [<000000004643557d>] irq_exit+0x53/0xa2 [<0000000029ddee8f>] smp_apic_timer_interrupt+0x22a/0x235 because __krealloc() is not supposed to release the old memory and it is released later via kfree_rcu(). Since this is the only external user of __krealloc(), just mark it as not leak here. Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_extend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 9fe0ddc333fb..bd71a828ebde 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -71,6 +71,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) rcu_read_unlock(); alloc = max(newlen, NF_CT_EXT_PREALLOC); + kmemleak_not_leak(old); new = __krealloc(old, alloc, gfp); if (!new) return NULL; From a6615743704fdc179e227f84b7903edd1f0b4241 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Apr 2018 12:53:12 +0200 Subject: [PATCH 178/660] netfilter: fix CONFIG_NF_REJECT_IPV6=m link error We get a new link error with CONFIG_NFT_REJECT_INET=y and CONFIG_NF_REJECT_IPV6=m after larger parts of the nftables modules are linked together: net/netfilter/nft_reject_inet.o: In function `nft_reject_inet_eval': nft_reject_inet.c:(.text+0x17c): undefined reference to `nf_send_unreach6' nft_reject_inet.c:(.text+0x190): undefined reference to `nf_send_reset6' The problem is that with NF_TABLES_INET set, we implicitly try to use the ipv6 version as well for NFT_REJECT, but when CONFIG_IPV6 is set to a loadable module, it's impossible to reach that. The best workaround I found is to express the above as a Kconfig dependency, forcing NFT_REJECT itself to be 'm' in that particular configuration. Fixes: 02c7b25e5f54 ("netfilter: nf_tables: build-in filter chain type") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 704b3832dbad..44d8a55e9721 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -594,6 +594,7 @@ config NFT_QUOTA config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" + depends on !NF_TABLES_INET || (IPV6!=m || m) help This option adds the "reject" expression that you can use to explicitly deny and notify via TCP reset/ICMP informational errors From 569ccae68b38654f04b6842b034aa33857f605fe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Apr 2018 09:30:27 +0200 Subject: [PATCH 179/660] netfilter: nf_tables: can't fail after linking rule into active rule list rules in nftables a free'd using kfree, but protected by rcu, i.e. we must wait for a grace period to elapse. Normal removal patch does this, but nf_tables_newrule() doesn't obey this rule during error handling. It calls nft_trans_rule_add() *after* linking rule, and, if that fails to allocate memory, it unlinks the rule and then kfree() it -- this is unsafe. Switch order -- first add rule to transaction list, THEN link it to public list. Note: nft_trans_rule_add() uses GFP_KERNEL; it will not fail so this is not a problem in practice (spotted only during code review). Fixes: 0628b123c96d12 ("netfilter: nfnetlink: add batch support and use it from nf_tables") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 59 +++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9134cc429ad4..b1984f8f7253 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2361,41 +2361,46 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (nft_is_active_next(net, old_rule)) { - trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, - old_rule); - if (trans == NULL) { - err = -ENOMEM; - goto err2; - } - nft_deactivate_next(net, old_rule); - chain->use--; - list_add_tail_rcu(&rule->list, &old_rule->list); - } else { + if (!nft_is_active_next(net, old_rule)) { err = -ENOENT; goto err2; } - } else if (nlh->nlmsg_flags & NLM_F_APPEND) - if (old_rule) - list_add_rcu(&rule->list, &old_rule->list); - else - list_add_tail_rcu(&rule->list, &chain->rules); - else { - if (old_rule) - list_add_tail_rcu(&rule->list, &old_rule->list); - else - list_add_rcu(&rule->list, &chain->rules); - } + trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, + old_rule); + if (trans == NULL) { + err = -ENOMEM; + goto err2; + } + nft_deactivate_next(net, old_rule); + chain->use--; - if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { - err = -ENOMEM; - goto err3; + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + list_add_tail_rcu(&rule->list, &old_rule->list); + } else { + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_APPEND) { + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + } else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } } chain->use++; return 0; -err3: - list_del_rcu(&rule->list); err2: nf_tables_rule_destroy(&ctx, rule); err1: From 2f6adf481527c8ab8033c601f55bfb5b3712b2ac Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Apr 2018 09:00:24 +0200 Subject: [PATCH 180/660] netfilter: nf_tables: free set name in error path set->name must be free'd here in case ops->init fails. Fixes: 387454901bd6 ("netfilter: nf_tables: Allow set names of up to 255 chars") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b1984f8f7253..102ad873acb4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3212,18 +3212,20 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err = ops->init(set, &desc, nla); if (err < 0) - goto err2; + goto err3; err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err3; + goto err4; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; -err3: +err4: ops->destroy(set); +err3: + kfree(set->name); err2: kvfree(set); err1: From e79f245ddec17bbd89d73cd0169dba4be46c9b55 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 14 Apr 2018 05:10:52 +0200 Subject: [PATCH 181/660] X86/KVM: Properly update 'tsc_offset' to represent the running guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update 'tsc_offset' on vmentry/vmexit of L2 guests to ensure that it always captures the TSC_OFFSET of the running guest whether it is the L1 or L2 guest. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Jim Mattson Suggested-by: Paolo Bonzini Signed-off-by: KarimAllah Ahmed [AMD changes, fix update_ia32_tsc_adjust_msr. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 17 ++++++++++- arch/x86/kvm/vmx.c | 54 +++++++++++++++++++++------------ arch/x86/kvm/x86.c | 6 ++-- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 949c977bc4c9..c25775fad4ed 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1013,6 +1013,7 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3ebc8ad6891..e77a536d0b7c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1424,12 +1424,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (is_guest_mode(vcpu)) + return svm->nested.hsave->control.tsc_offset; + + return vcpu->arch.tsc_offset; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); u64 g_tsc_offset = 0; if (is_guest_mode(vcpu)) { + /* Write L1's TSC offset. */ g_tsc_offset = svm->vmcb->control.tsc_offset - svm->nested.hsave->control.tsc_offset; svm->nested.hsave->control.tsc_offset = offset; @@ -3323,6 +3334,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) /* Restore the original control entries */ copy_vmcb_control_area(vmcb, hsave); + svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); @@ -3483,10 +3495,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, /* We don't want to see VMMCALLs from a nested guest */ clr_intercept(svm, INTERCEPT_VMMCALL); + svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; + svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; - svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; @@ -7102,6 +7116,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, + .read_l1_tsc_offset = svm_read_l1_tsc_offset, .write_tsc_offset = svm_write_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a13c603bdefb..7207e6cc07c1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2874,6 +2874,17 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx_update_msr_bitmap(&vmx->vcpu); } +static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (is_guest_mode(vcpu) && + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + return vcpu->arch.tsc_offset - vmcs12->tsc_offset; + + return vcpu->arch.tsc_offset; +} + /* * reads and returns guest's timestamp counter "register" * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset @@ -11175,11 +11186,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); } - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) - vmcs_write64(TSC_OFFSET, - vcpu->arch.tsc_offset + vmcs12->tsc_offset); - else - vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); + if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -11427,6 +11435,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); u32 msr_entry_idx; u32 exit_qual; + int r; enter_guest_mode(vcpu); @@ -11436,26 +11445,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); - if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { - leave_guest_mode(vcpu); - vmx_switch_vmcs(vcpu, &vmx->vmcs01); - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, exit_qual); - return 1; - } + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + vcpu->arch.tsc_offset += vmcs12->tsc_offset; + + r = EXIT_REASON_INVALID_STATE; + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) + goto fail; nested_get_vmcs12_pages(vcpu, vmcs12); + r = EXIT_REASON_MSR_LOAD_FAIL; msr_entry_idx = nested_vmx_load_msr(vcpu, vmcs12->vm_entry_msr_load_addr, vmcs12->vm_entry_msr_load_count); - if (msr_entry_idx) { - leave_guest_mode(vcpu); - vmx_switch_vmcs(vcpu, &vmx->vmcs01); - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); - return 1; - } + if (msr_entry_idx) + goto fail; /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point @@ -11464,6 +11468,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) * the success flag) when L2 exits (see nested_vmx_vmexit()). */ return 0; + +fail: + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + vcpu->arch.tsc_offset -= vmcs12->tsc_offset; + leave_guest_mode(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); + nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual); + return 1; } /* @@ -12035,6 +12047,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, leave_guest_mode(vcpu); + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + vcpu->arch.tsc_offset -= vmcs12->tsc_offset; + if (likely(!vmx->fail)) { if (exit_reason == -1) sync_vmcs12(vcpu, vmcs12); @@ -12725,6 +12740,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + .read_l1_tsc_offset = vmx_read_l1_tsc_offset, .write_tsc_offset = vmx_write_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0334b250e102..3f3fba58c960 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1490,7 +1490,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) { - u64 curr_offset = vcpu->arch.tsc_offset; + u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } @@ -1532,7 +1532,9 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { - return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc); + u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); + + return tsc_offset + kvm_scale_tsc(vcpu, host_tsc); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); From dd259935e4eec844dc3e5b8a7cd951cd658b4fb6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Apr 2018 11:38:35 +0200 Subject: [PATCH 182/660] kvm: x86: move MSR_IA32_TSC handling to x86.c This is not specific to Intel/AMD anymore. The TSC offset is available in vcpu->arch.tsc_offset. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 9 --------- arch/x86/kvm/vmx.c | 20 -------------------- arch/x86/kvm/x86.c | 6 ++++++ 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e77a536d0b7c..80ea26274a54 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4050,12 +4050,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct vcpu_svm *svm = to_svm(vcpu); switch (msr_info->index) { - case MSR_IA32_TSC: { - msr_info->data = svm->vmcb->control.tsc_offset + - kvm_scale_tsc(vcpu, rdtsc()); - - break; - } case MSR_STAR: msr_info->data = svm->vmcb->save.star; break; @@ -4208,9 +4202,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->vmcb->save.g_pat = data; mark_dirty(svm->vmcb, VMCB_NPT); break; - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr); - break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7207e6cc07c1..5b49ad448ad6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2885,20 +2885,6 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) return vcpu->arch.tsc_offset; } -/* - * reads and returns guest's timestamp counter "register" - * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset - * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3 - */ -static u64 guest_read_tsc(struct kvm_vcpu *vcpu) -{ - u64 host_tsc, tsc_offset; - - host_tsc = rdtsc(); - tsc_offset = vmcs_read64(TSC_OFFSET); - return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; -} - /* * writes 'offset' into guest's timestamp counter offset register */ @@ -3529,9 +3515,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); - case MSR_IA32_TSC: - msr_info->data = guest_read_tsc(vcpu); - break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && @@ -3651,9 +3634,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr_info); - break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3f3fba58c960..51ecd381793b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2364,6 +2364,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.smbase = data; break; + case MSR_IA32_TSC: + kvm_write_tsc(vcpu, msr_info); + break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) return 1; @@ -2607,6 +2610,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = vcpu->arch.microcode_version; break; + case MSR_IA32_TSC: + msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; + break; case MSR_MTRRcap: case 0x200 ... 0x2ff: return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); From d5edb7f8e7ab9fd5fd54a77d957b1733f117a813 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 27 Mar 2018 22:46:11 +0200 Subject: [PATCH 183/660] kvm: selftests: add vmx_tsc_adjust_test The test checks the behavior of setting MSR_IA32_TSC in a nested guest, and the TSC_OFFSET VMCS field in general. It also introduces the testing infrastructure for Intel nested virtualization. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 3 +- .../testing/selftests/kvm/include/kvm_util.h | 15 +- tools/testing/selftests/kvm/include/vmx.h | 494 ++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 18 +- tools/testing/selftests/kvm/lib/vmx.c | 243 +++++++++ .../selftests/kvm/vmx_tsc_adjust_test.c | 231 ++++++++ 6 files changed, 991 insertions(+), 13 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/vmx.h create mode 100644 tools/testing/selftests/kvm/lib/vmx.c create mode 100644 tools/testing/selftests/kvm/vmx_tsc_adjust_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 1da541e1ab75..2ddcc96ae456 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,10 +4,11 @@ top_srcdir = ../../../../ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86.c +LIBKVM_x86_64 = lib/x86.c lib/vmx.c TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 += sync_regs_test +TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 57974ad46373..637b7017b6ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -112,24 +112,27 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); void vcpu_set_cpuid( struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); -struct kvm_cpuid2 *allocate_kvm_cpuid2(void); struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index); +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); static inline struct kvm_cpuid_entry2 * -find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) +kvm_get_supported_cpuid_entry(uint32_t function) { - return find_cpuid_index_entry(cpuid, function, 0); + return kvm_get_supported_cpuid_index(function, 0); } struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, + vm_paddr_t vmxon_paddr, + vm_vaddr_t vmcs_vaddr, + vm_paddr_t vmcs_paddr); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h new file mode 100644 index 000000000000..6ed8499807fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/vmx.h @@ -0,0 +1,494 @@ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_VMX_H +#define SELFTEST_KVM_VMX_H + +#include +#include "x86.h" + +#define CPUID_VMX_BIT 5 + +#define CPUID_VMX (1 << 5) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_PML_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 +#define LAST_EXIT_REASON 64 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +void prepare_for_vmx_operation(void); +void prepare_vmcs(void *guest_rip, void *guest_rsp); +struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, + vmx_guest_code_t guest_code); + +#endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e213d513dc61..2cedfda181d4 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -378,7 +378,7 @@ int kvm_memcmp_hva_gva(void *hva, * complicated. This function uses a reasonable default length for * the array and performs the appropriate allocation. */ -struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) { struct kvm_cpuid2 *cpuid; int nent = 100; @@ -402,17 +402,21 @@ struct kvm_cpuid2 *allocate_kvm_cpuid2(void) * Input Args: None * * Output Args: - * cpuid - The supported KVM CPUID * - * Return: void + * Return: The supported KVM CPUID * * Get the guest CPUID supported by KVM. */ -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { + static struct kvm_cpuid2 *cpuid; int ret; int kvm_fd; + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); kvm_fd = open(KVM_DEV_PATH, O_RDONLY); TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", KVM_DEV_PATH, kvm_fd, errno); @@ -422,6 +426,7 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) ret, errno); close(kvm_fd); + return cpuid; } /* Locate a cpuid entry. @@ -435,12 +440,13 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) * Return: A pointer to the cpuid entry. Never returns NULL. */ struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index) +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) { + struct kvm_cpuid2 *cpuid; struct kvm_cpuid_entry2 *entry = NULL; int i; + cpuid = kvm_get_supported_cpuid(); for (i = 0; i < cpuid->nent; i++) { if (cpuid->entries[i].function == function && cpuid->entries[i].index == index) { diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c new file mode 100644 index 000000000000..0231bc0aae7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/vmx.c @@ -0,0 +1,243 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +/* Create a default VM for VMX tests. + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm * +vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + vm_vaddr_t vmxon_vaddr; + vm_paddr_t vmxon_paddr; + vm_vaddr_t vmcs_vaddr; + vm_paddr_t vmcs_paddr; + + vm = vm_create_default(vcpuid, (void *) guest_code); + + /* Enable nesting in CPUID */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); + + /* Setup of a region of guest memory for the vmxon region. */ + vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); + + /* Setup of a region of guest memory for a vmcs. */ + vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); + + vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, + vmcs_paddr); + + return vm; +} + +void prepare_for_vmx_operation(void) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + required |= FEATURE_CONTROL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(void) +{ + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt_base()); + vmwrite(HOST_IDTR_BASE, get_idt_base()); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..8f7f62093add --- /dev/null +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -0,0 +1,231 @@ +/* + * gtests/tests/vmx_tsc_adjust_test.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +#include +#include + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define PAGE_SIZE 4096 +#define VCPU_ID 5 + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +struct vmx_page { + vm_vaddr_t virt; + vm_paddr_t phys; +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +struct kvm_single_msr { + struct kvm_msrs header; + struct kvm_msr_entry entry; +} __attribute__((packed)); + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +/* Array of vmx_page descriptors that is shared with the guest. */ +struct vmx_page *vmx_pages; + +#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) +static void do_exit_to_l0(uint16_t port, unsigned long arg) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg) + : "rax"); +} + + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \ +} while (0) + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + exit_to_l0(PORT_REPORT, adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_page *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + prepare_for_vmx_operation(); + + /* Enter VMX root operation. */ + *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); + + /* Load a VMCS. */ + *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); + GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + exit_to_l0(PORT_DONE, 0); +} + +static void allocate_vmx_page(struct vmx_page *page) +{ + vm_vaddr_t virt; + + virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); + memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); + + page->virt = virt; + page->phys = addr_gva2gpa(vm, virt); +} + +static vm_vaddr_t allocate_vmx_pages(void) +{ + vm_vaddr_t vmx_pages_vaddr; + int i; + + vmx_pages_vaddr = vm_vaddr_alloc( + vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); + + vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); + + for (i = 0; i < NUM_VMX_PAGES; i++) + allocate_vmx_page(&vmx_pages[i]); + + return vmx_pages_vaddr; +} + +void report(int64_t val) +{ + printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_vaddr; + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + printf("nested VMX not enabled, skipping test"); + return 0; + } + + vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vmx_pages_vaddr = allocate_vmx_pages(); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_regs regs; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + + switch (run->io.port) { + case PORT_ABORT: + TEST_ASSERT(false, "%s", (const char *) regs.rdi); + /* NOT REACHED */ + case PORT_REPORT: + report(regs.rdi); + break; + case PORT_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + } + } + + kvm_vm_free(vm); +done: + return 0; +} From b8e47d87be65aec931846ced9a34a22d2021c311 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Thu, 5 Apr 2018 17:33:22 +0530 Subject: [PATCH 184/660] drm: Fix HDCP downstream dev count read In both HDMI and DP, device count is represented by 6:0 bits of a register(BInfo/Bstatus) So macro for bitmasking the device_count is fixed(0x3F->0x7F). v3: Retained the Rb-ed. v4: %s/drm\/i915/drm [rodrigo] v5: Added "Fixes:" and HDCP keyword in subject [Rodrigo, Sean Paul] Signed-off-by: Ramalingam C Fixes: 495eb7f877ab drm: Add some HDCP related #defines cc: Sean Paul Reviewed-by: Sean Paul Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/1522929802-22850-1-git-send-email-ramalingam.c@intel.com --- include/drm/drm_hdcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h index 562fa7df2637..98e63d870139 100644 --- a/include/drm/drm_hdcp.h +++ b/include/drm/drm_hdcp.h @@ -19,7 +19,7 @@ #define DRM_HDCP_RI_LEN 2 #define DRM_HDCP_V_PRIME_PART_LEN 4 #define DRM_HDCP_V_PRIME_NUM_PARTS 5 -#define DRM_HDCP_NUM_DOWNSTREAM(x) (x & 0x3f) +#define DRM_HDCP_NUM_DOWNSTREAM(x) (x & 0x7f) #define DRM_HDCP_MAX_CASCADE_EXCEEDED(x) (x & BIT(3)) #define DRM_HDCP_MAX_DEVICE_EXCEEDED(x) (x & BIT(7)) From 5ef5ac8de125fe6b4b23293bee026ca7ea1529b9 Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Fri, 30 Mar 2018 15:41:55 +0800 Subject: [PATCH 185/660] ASoC: rt5514: Add the missing register in the readable table The patch adds the missing register in the readable table. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index e8a66b03faab..1570b91bf018 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -89,6 +89,7 @@ static const struct reg_default rt5514_reg[] = { {RT5514_PLL3_CALIB_CTRL5, 0x40220012}, {RT5514_DELAY_BUF_CTRL1, 0x7fff006a}, {RT5514_DELAY_BUF_CTRL3, 0x00000000}, + {RT5514_ASRC_IN_CTRL1, 0x00000003}, {RT5514_DOWNFILTER0_CTRL1, 0x00020c2f}, {RT5514_DOWNFILTER0_CTRL2, 0x00020c2f}, {RT5514_DOWNFILTER0_CTRL3, 0x10000362}, @@ -181,6 +182,7 @@ static bool rt5514_readable_register(struct device *dev, unsigned int reg) case RT5514_PLL3_CALIB_CTRL5: case RT5514_DELAY_BUF_CTRL1: case RT5514_DELAY_BUF_CTRL3: + case RT5514_ASRC_IN_CTRL1: case RT5514_DOWNFILTER0_CTRL1: case RT5514_DOWNFILTER0_CTRL2: case RT5514_DOWNFILTER0_CTRL3: @@ -238,6 +240,7 @@ static bool rt5514_i2c_readable_register(struct device *dev, case RT5514_DSP_MAPPING | RT5514_PLL3_CALIB_CTRL5: case RT5514_DSP_MAPPING | RT5514_DELAY_BUF_CTRL1: case RT5514_DSP_MAPPING | RT5514_DELAY_BUF_CTRL3: + case RT5514_DSP_MAPPING | RT5514_ASRC_IN_CTRL1: case RT5514_DSP_MAPPING | RT5514_DOWNFILTER0_CTRL1: case RT5514_DSP_MAPPING | RT5514_DOWNFILTER0_CTRL2: case RT5514_DSP_MAPPING | RT5514_DOWNFILTER0_CTRL3: From 9783ccd0f2507cbe3c5ff1cb84bf6ae3a512d17d Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 16 Apr 2018 10:16:45 +0800 Subject: [PATCH 186/660] net: Fix one possible memleak in ip_setup_cork It would allocate memory in this function when the cork->opt is NULL. But the memory isn't freed if failed in the latter rt check, and return error directly. It causes the memleak if its caller is ip_make_skb which also doesn't free the cork->opt when meet a error. Now move the rt check ahead to avoid the memleak. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4c11b810a447..83c73bab2c3d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1109,6 +1109,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, struct ip_options_rcu *opt; struct rtable *rt; + rt = *rtp; + if (unlikely(!rt)) + return -EFAULT; + /* * setup for corking. */ @@ -1124,9 +1128,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->flags |= IPCORK_OPT; cork->addr = ipc->addr; } - rt = *rtp; - if (unlikely(!rt)) - return -EFAULT; + /* * We steal reference to this route, caller should not release it */ From bc8a3ef1940c9a6dfa316b31e063fdd4fbab0add Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 16 Apr 2018 10:01:04 -0700 Subject: [PATCH 187/660] ARM: dts: Fix cm2 and prm sizes for omap4 The size of these modules is 0x2000, not 0x3000. The extra 0x1000 after 0x2000 is for the interconnect target agent which is a separate device. Fixes: 7415b0b4c645 ("ARM: dts: omap4: add minimal l4 bus layout with control module support") Cc: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 475904894b86..e554b6e039f3 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -163,10 +163,10 @@ cm2: cm2@8000 { compatible = "ti,omap4-cm2", "simple-bus"; - reg = <0x8000 0x3000>; + reg = <0x8000 0x2000>; #address-cells = <1>; #size-cells = <1>; - ranges = <0 0x8000 0x3000>; + ranges = <0 0x8000 0x2000>; cm2_clocks: clocks { #address-cells = <1>; @@ -250,11 +250,11 @@ prm: prm@6000 { compatible = "ti,omap4-prm"; - reg = <0x6000 0x3000>; + reg = <0x6000 0x2000>; interrupts = ; #address-cells = <1>; #size-cells = <1>; - ranges = <0 0x6000 0x3000>; + ranges = <0 0x6000 0x2000>; prm_clocks: clocks { #address-cells = <1>; From dc29f581fa9d5567c3a01ecfdd7f16b2e613c7fb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 29 Mar 2018 02:14:03 +0000 Subject: [PATCH 188/660] ASoC: amd: acp-da7219-max98357: Make symbol da7219_dai_clk static Fixes the following sparse warning: sound/soc/amd/acp-da7219-max98357a.c:46:12: warning: symbol 'da7219_dai_clk' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown --- sound/soc/amd/acp-da7219-max98357a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index b205c782e494..f41560ecbcd1 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -43,7 +43,7 @@ #define DUAL_CHANNEL 2 static struct snd_soc_jack cz_jack; -struct clk *da7219_dai_clk; +static struct clk *da7219_dai_clk; static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd) { From e6f39e87b6439939a14cb7fdd94086a082b63b87 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 16 Apr 2018 11:43:57 +0200 Subject: [PATCH 189/660] x86/ldt: Fix support_pte_mask filtering in map_ldt_struct() The |= operator will let us end up with an invalid PTE. Use the correct &= instead. [ The bug was also independently reported by Shuah Khan ] Fixes: fb43d6cb91ef ('x86/mm: Do not auto-massage page protections') Acked-by: Andy Lutomirski Acked-by: Dave Hansen Signed-off-by: Joerg Roedel Signed-off-by: Linus Torvalds --- arch/x86/kernel/ldt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index d41d896481b8..c9b14020f4dd 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -166,7 +166,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) */ pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); /* Filter out unsuppored __PAGE_KERNEL* bits: */ - pgprot_val(pte_prot) |= __supported_pte_mask; + pgprot_val(pte_prot) &= __supported_pte_mask; pte = pfn_pte(pfn, pte_prot); set_pte_at(mm, va, ptep, pte); pte_unmap_unlock(ptep, ptl); From e86281e700cca8a773f9a572fa406adf2784ba5c Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 28 Mar 2018 23:41:52 +0000 Subject: [PATCH 190/660] eCryptfs: don't pass up plaintext names when using filename encryption Both ecryptfs_filldir() and ecryptfs_readlink_lower() use ecryptfs_decode_and_decrypt_filename() to translate lower filenames to upper filenames. The function correctly passes up lower filenames, unchanged, when filename encryption isn't in use. However, it was also passing up lower filenames when the filename wasn't encrypted or when decryption failed. Since 88ae4ab9802e, eCryptfs refuses to lookup lower plaintext names when filename encryption is enabled so this resulted in a situation where userspace would see lower plaintext filenames in calls to getdents(2) but then not be able to lookup those filenames. An example of this can be seen when enabling filename encryption on an eCryptfs mount at the root directory of an Ext4 filesystem: $ ls -1i /lower 12 ECRYPTFS_FNEK_ENCRYPTED.FWYZD8TcW.5FV-TKTEYOHsheiHX9a-w.NURCCYIMjI8pn5BDB9-h3fXwrE-- 11 lost+found $ ls -1i /upper ls: cannot access '/upper/lost+found': No such file or directory ? lost+found 12 test With this change, the lower lost+found dentry is ignored: $ ls -1i /lower 12 ECRYPTFS_FNEK_ENCRYPTED.FWYZD8TcW.5FV-TKTEYOHsheiHX9a-w.NURCCYIMjI8pn5BDB9-h3fXwrE-- 11 lost+found $ ls -1i /upper 12 test Additionally, some potentially noisy error/info messages in the related code paths are turned into debug messages so that the logs can't be easily filled. Fixes: 88ae4ab9802e ("ecryptfs_lookup(): try either only encrypted or plaintext name") Reported-by: Guenter Roeck Cc: Al Viro Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 41 ++++++++++++++++++++++++++++------------- fs/ecryptfs/file.c | 21 ++++++++++++++++----- 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 846ca150d52e..4dd842f72846 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1997,6 +1997,16 @@ out: return rc; } +static bool is_dot_dotdot(const char *name, size_t name_size) +{ + if (name_size == 1 && name[0] == '.') + return true; + else if (name_size == 2 && name[0] == '.' && name[1] == '.') + return true; + + return false; +} + /** * ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext * @plaintext_name: The plaintext name @@ -2021,13 +2031,21 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, size_t packet_size; int rc = 0; - if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) - && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - && (name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) - && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, - ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) { - const char *orig_name = name; - size_t orig_name_size = name_size; + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && + !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)) { + if (is_dot_dotdot(name, name_size)) { + rc = ecryptfs_copy_filename(plaintext_name, + plaintext_name_size, + name, name_size); + goto out; + } + + if (name_size <= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE || + strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)) { + rc = -EINVAL; + goto out; + } name += ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; name_size -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; @@ -2047,12 +2065,9 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, decoded_name, decoded_name_size); if (rc) { - printk(KERN_INFO "%s: Could not parse tag 70 packet " - "from filename; copying through filename " - "as-is\n", __func__); - rc = ecryptfs_copy_filename(plaintext_name, - plaintext_name_size, - orig_name, orig_name_size); + ecryptfs_printk(KERN_DEBUG, + "%s: Could not parse tag 70 packet from filename\n", + __func__); goto out_free; } } else { diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index c74ed3ca3372..b76a9853325e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -82,17 +82,28 @@ ecryptfs_filldir(struct dir_context *ctx, const char *lower_name, buf->sb, lower_name, lower_namelen); if (rc) { - printk(KERN_ERR "%s: Error attempting to decode and decrypt " - "filename [%s]; rc = [%d]\n", __func__, lower_name, - rc); - goto out; + if (rc != -EINVAL) { + ecryptfs_printk(KERN_DEBUG, + "%s: Error attempting to decode and decrypt filename [%s]; rc = [%d]\n", + __func__, lower_name, rc); + return rc; + } + + /* Mask -EINVAL errors as these are most likely due a plaintext + * filename present in the lower filesystem despite filename + * encryption being enabled. One unavoidable example would be + * the "lost+found" dentry in the root directory of an Ext4 + * filesystem. + */ + return 0; } + buf->caller->pos = buf->ctx.pos; rc = !dir_emit(buf->caller, name, name_size, ino, d_type); kfree(name); if (!rc) buf->entries_written++; -out: + return rc; } From 8a8158c85e1e774a44fbe81106fa41138580dfd1 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Thu, 29 Mar 2018 10:28:23 +0100 Subject: [PATCH 191/660] MIPS: memset.S: EVA & fault support for small_memset The MIPS kernel memset / bzero implementation includes a small_memset branch which is used when the region to be set is smaller than a long (4 bytes on 32bit, 8 bytes on 64bit). The current small_memset implementation uses a simple store byte loop to write the destination. There are 2 issues with this implementation: 1. When EVA mode is active, user and kernel address spaces may overlap. Currently the use of the sb instruction means kernel mode addressing is always used and an intended write to userspace may actually overwrite some critical kernel data. 2. If the write triggers a page fault, for example by calling __clear_user(NULL, 2), instead of gracefully handling the fault, an OOPS is triggered. Fix these issues by replacing the sb instruction with the EX() macro, which will emit EVA compatible instuctions as required. Additionally implement a fault fixup for small_memset which sets a2 to the number of bytes that could not be cleared (as defined by __clear_user). Reported-by: Chuanhua Lei Signed-off-by: Matt Redfearn Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/18975/ Signed-off-by: James Hogan --- arch/mips/lib/memset.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index a1456664d6c2..90bcdf1224ee 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -219,7 +219,7 @@ 1: PTR_ADDIU a0, 1 /* fill bytewise */ R10KCBARRIER(0(ra)) bne t1, a0, 1b - sb a1, -1(a0) + EX(sb, a1, -1(a0), .Lsmall_fixup\@) 2: jr ra /* done */ move a2, zero @@ -260,6 +260,11 @@ jr ra andi v1, a2, STORMASK +.Lsmall_fixup\@: + PTR_SUBU a2, t1, a0 + jr ra + PTR_ADDIU a2, 1 + .endm /* From ec518f21cb1a1b1f8a516499ea05c60299e04963 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Apr 2018 08:29:42 -0700 Subject: [PATCH 192/660] tipc: add policy for TIPC_NLA_NET_ADDR Before syzbot/KMSAN bites, add the missing policy for TIPC_NLA_NET_ADDR Fixes: 27c21416727a ("tipc: add net set to new netlink api") Signed-off-by: Eric Dumazet Cc: Jon Maloy Cc: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index b76f13f6fea1..d4e0bbeee727 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -79,7 +79,8 @@ const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_NET_ID] = { .type = NLA_U32 } + [TIPC_NLA_NET_ID] = { .type = NLA_U32 }, + [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 }, }; const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { From c6404122cb18f1fbd2a6dc85ab687f6fa2e454cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Apr 2018 08:29:43 -0700 Subject: [PATCH 193/660] tipc: fix possible crash in __tipc_nl_net_set() syzbot reported a crash in __tipc_nl_net_set() caused by NULL dereference. We need to check that both TIPC_NLA_NET_NODEID and TIPC_NLA_NET_NODEID_W1 are present. We also need to make sure userland provided u64 attributes. Fixes: d50ccc2d3909 ("tipc: add 128-bit node identifier") Signed-off-by: Eric Dumazet Cc: Jon Maloy Cc: Ying Xue Reported-by: syzbot Signed-off-by: David S. Miller --- net/tipc/net.c | 2 ++ net/tipc/netlink.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/tipc/net.c b/net/tipc/net.c index 856f9e97ea29..4fbaa0464405 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -252,6 +252,8 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) u64 *w0 = (u64 *)&node_id[0]; u64 *w1 = (u64 *)&node_id[8]; + if (!attrs[TIPC_NLA_NET_NODEID_W1]) + return -EINVAL; *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); tipc_net_init(net, node_id, 0); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index d4e0bbeee727..6ff2254088f6 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -81,6 +81,8 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_NET_ID] = { .type = NLA_U32 }, [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_NET_NODEID] = { .type = NLA_U64 }, + [TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 }, }; const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { From 0a12e80ce4230434c2ed66ad0d65af0b7ccecea8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 1 Apr 2018 09:42:25 -0700 Subject: [PATCH 194/660] soc: bcm: raspberrypi-power: Fix use of __packed Commit a09cd356586d ("ARM: bcm2835: add rpi power domain driver") attempted to annotate the structure rpi_power_domain_packet with __packed but introduced a typo and made it named __packet instead. Just drop the annotation since the structure is naturally aligned already. Fixes: a09cd356586d ("ARM: bcm2835: add rpi power domain driver") Signed-off-by: Florian Fainelli --- drivers/soc/bcm/raspberrypi-power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/bcm/raspberrypi-power.c b/drivers/soc/bcm/raspberrypi-power.c index fe96a8b956fb..f7ed1187518b 100644 --- a/drivers/soc/bcm/raspberrypi-power.c +++ b/drivers/soc/bcm/raspberrypi-power.c @@ -45,7 +45,7 @@ struct rpi_power_domains { struct rpi_power_domain_packet { u32 domain; u32 on; -} __packet; +}; /* * Asks the firmware to enable or disable power on a specific power From 144345a4a8c3b497a3f60d3af9d6071a37660186 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 8 Apr 2018 11:05:15 +0200 Subject: [PATCH 195/660] soc: bcm2835: Make !RASPBERRYPI_FIRMWARE dummies return failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_RASPBERRYPI_FIRMWARE=n: drivers/gpio/gpio-raspberrypi-exp.c: In function ‘rpi_exp_gpio_get_polarity’: drivers/gpio/gpio-raspberrypi-exp.c:71: warning: ‘get.polarity’ is used uninitialized in this function drivers/gpio/gpio-raspberrypi-exp.c: In function ‘rpi_exp_gpio_get_direction’: drivers/gpio/gpio-raspberrypi-exp.c:150: warning: ‘get.direction’ is used uninitialized in this function The dummy firmware interface functions return 0, which means success, causing subsequent code to make use of the never initialized output parameter. Fix this by making the dummy functions return an error code (-ENOSYS) instead. Note that this assumes the firmware always fills in the requested data in the CONFIG_RASPBERRYPI_FIRMWARE=y case. Fixes: d45f1a563b92dac7 ("staging: vc04_services: fix up rpi firmware functions") Signed-off-by: Geert Uytterhoeven Reviewed-by: Eric Anholt Signed-off-by: Florian Fainelli --- include/soc/bcm2835/raspberrypi-firmware.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/soc/bcm2835/raspberrypi-firmware.h b/include/soc/bcm2835/raspberrypi-firmware.h index 50df5b28d2c9..8ee8991aa099 100644 --- a/include/soc/bcm2835/raspberrypi-firmware.h +++ b/include/soc/bcm2835/raspberrypi-firmware.h @@ -143,13 +143,13 @@ struct rpi_firmware *rpi_firmware_get(struct device_node *firmware_node); static inline int rpi_firmware_property(struct rpi_firmware *fw, u32 tag, void *data, size_t len) { - return 0; + return -ENOSYS; } static inline int rpi_firmware_property_list(struct rpi_firmware *fw, void *data, size_t tag_size) { - return 0; + return -ENOSYS; } static inline struct rpi_firmware *rpi_firmware_get(struct device_node *firmware_node) From 5968a70d7af5f2abbd9d9f9c8e86da51f0a6b16d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Apr 2018 12:32:55 -0700 Subject: [PATCH 196/660] textsearch: fix kernel-doc warnings and add kernel-api section Make lib/textsearch.c usable as kernel-doc. Add textsearch() function family to kernel-api documentation. Fix kernel-doc warnings in : ../include/linux/textsearch.h:65: warning: Incorrect use of kernel-doc format: * get_next_block - fetch next block of data ../include/linux/textsearch.h:82: warning: Incorrect use of kernel-doc format: * finish - finalize/clean a series of get_next_block() calls Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- Documentation/core-api/kernel-api.rst | 13 +++++++++ include/linux/textsearch.h | 4 +-- lib/textsearch.c | 40 +++++++++++++++------------ 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index ff335f8aeb39..92f30006adae 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -136,6 +136,19 @@ Sorting .. kernel-doc:: lib/list_sort.c :export: +Text Searching +-------------- + +.. kernel-doc:: lib/textsearch.c + :doc: ts_intro + +.. kernel-doc:: lib/textsearch.c + :export: + +.. kernel-doc:: include/linux/textsearch.h + :functions: textsearch_find textsearch_next \ + textsearch_get_pattern textsearch_get_pattern_len + UUID/GUID --------- diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 0494db3fd9e8..13770cfe33ad 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -62,7 +62,7 @@ struct ts_config int flags; /** - * get_next_block - fetch next block of data + * @get_next_block: fetch next block of data * @consumed: number of bytes consumed by the caller * @dst: destination buffer * @conf: search configuration @@ -79,7 +79,7 @@ struct ts_config struct ts_state *state); /** - * finish - finalize/clean a series of get_next_block() calls + * @finish: finalize/clean a series of get_next_block() calls * @conf: search configuration * @state: search state * diff --git a/lib/textsearch.c b/lib/textsearch.c index 0b79908dfe89..5939549c0e7b 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -10,7 +10,10 @@ * Pablo Neira Ayuso * * ========================================================================== - * + */ + +/** + * DOC: ts_intro * INTRODUCTION * * The textsearch infrastructure provides text searching facilities for @@ -19,7 +22,9 @@ * * ARCHITECTURE * - * User + * .. code-block:: none + * + * User * +----------------+ * | finish()|<--------------(6)-----------------+ * |get_next_block()|<--------------(5)---------------+ | @@ -33,21 +38,21 @@ * | (3)|----->| find()/next() |-----------+ | * | (7)|----->| destroy() |----------------------+ * +----------------+ +---------------+ - * - * (1) User configures a search by calling _prepare() specifying the - * search parameters such as the pattern and algorithm name. + * + * (1) User configures a search by calling textsearch_prepare() specifying + * the search parameters such as the pattern and algorithm name. * (2) Core requests the algorithm to allocate and initialize a search * configuration according to the specified parameters. - * (3) User starts the search(es) by calling _find() or _next() to - * fetch subsequent occurrences. A state variable is provided - * to the algorithm to store persistent variables. + * (3) User starts the search(es) by calling textsearch_find() or + * textsearch_next() to fetch subsequent occurrences. A state variable + * is provided to the algorithm to store persistent variables. * (4) Core eventually resets the search offset and forwards the find() * request to the algorithm. * (5) Algorithm calls get_next_block() provided by the user continuously * to fetch the data to be searched in block by block. * (6) Algorithm invokes finish() after the last call to get_next_block * to clean up any leftovers from get_next_block. (Optional) - * (7) User destroys the configuration by calling _destroy(). + * (7) User destroys the configuration by calling textsearch_destroy(). * (8) Core notifies the algorithm to destroy algorithm specific * allocations. (Optional) * @@ -62,9 +67,10 @@ * amount of times and even in parallel as long as a separate struct * ts_state variable is provided to every instance. * - * The actual search is performed by either calling textsearch_find_- - * continuous() for linear data or by providing an own get_next_block() - * implementation and calling textsearch_find(). Both functions return + * The actual search is performed by either calling + * textsearch_find_continuous() for linear data or by providing + * an own get_next_block() implementation and + * calling textsearch_find(). Both functions return * the position of the first occurrence of the pattern or UINT_MAX if * no match was found. Subsequent occurrences can be found by calling * textsearch_next() regardless of the linearity of the data. @@ -72,7 +78,7 @@ * Once you're done using a configuration it must be given back via * textsearch_destroy. * - * EXAMPLE + * EXAMPLE:: * * int pos; * struct ts_config *conf; @@ -87,13 +93,13 @@ * goto errout; * } * - * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); + * pos = textsearch_find_continuous(conf, \&state, example, strlen(example)); * if (pos != UINT_MAX) - * panic("Oh my god, dancing chickens at %d\n", pos); + * panic("Oh my god, dancing chickens at \%d\n", pos); * * textsearch_destroy(conf); - * ========================================================================== */ +/* ========================================================================== */ #include #include @@ -225,7 +231,7 @@ static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, * * Returns the position of first occurrence of the pattern or * %UINT_MAX if no occurrence was found. - */ + */ unsigned int textsearch_find_continuous(struct ts_config *conf, struct ts_state *state, const void *data, unsigned int len) From b64576cbf36afa5fabf3b31f62a1994c429ef855 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 197/660] m68k/mac: Don't remap SWIM MMIO region For reasons I don't understand, calling ioremap() then iounmap() on the SWIM MMIO region causes a hang on 68030 (but not on 68040). ~# modprobe swim_mod SWIM floppy driver Version 0.2 (2008-10-30) SWIM device not found ! watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [modprobe:285] Modules linked in: swim_mod(+) Format 00 Vector: 0064 PC: 000075aa Status: 2000 Not tainted ORIG_D0: ffffffff D0: d00c0000 A2: 007c2370 A1: 003f810c A0: 00040000 D5: d0096800 D4: d0097e00 D3: 00000001 D2: 00000003 D1: 00000000 Non-Maskable Interrupt Modules linked in: swim_mod(+) PC: [<000075ba>] __iounmap+0x24/0x10e SR: 2000 SP: 007abc48 a2: 007c2370 d0: d00c0000 d1: 000001a0 d2: 00000019 d3: 00000001 d4: d0097e00 d5: d0096800 a0: 00040000 a1: 003f810c Process modprobe (pid: 285, task=007c2370) Frame format=0 Stack from 007abc7c: ffffffed 00000000 006a4060 004712e0 007abca0 000076ea d0080000 00080000 010bb4b8 007abcd8 010ba542 d0096000 00000000 00000000 00000001 010bb59c 00000000 007abf30 010bb4b8 0047760a 0047763c 00477612 00616540 007abcec 0020a91a 00477600 0047760a 010bb4cc 007abd18 002092f2 0047760a 00333b06 007abd5c 00000000 0047760a 010bb4cc 00404f90 004776b8 00000001 007abd38 00209446 010bb4cc 0047760a 010bb4cc 0020938e 0031f8be 00616540 007abd64 Call Trace: [<000076ea>] iounmap+0x46/0x5a [<00080000>] shrink_page_list+0x7f6/0xe06 [<010ba542>] swim_probe+0xe4/0x496 [swim_mod] [<0020a91a>] platform_drv_probe+0x20/0x5e [<002092f2>] driver_probe_device+0x21c/0x2b8 [<00333b06>] mutex_lock+0x0/0x2e [<00209446>] __driver_attach+0xb8/0xce [<0020938e>] __driver_attach+0x0/0xce [<0031f8be>] klist_next+0x0/0xa0 [<00207562>] bus_for_each_dev+0x74/0xba [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<00333b06>] mutex_lock+0x0/0x2e [<00208e44>] driver_attach+0x1a/0x1e [<0020938e>] __driver_attach+0x0/0xce [<00207e26>] bus_add_driver+0x188/0x234 [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<00209894>] driver_register+0x58/0x104 [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<010bd000>] swim_init+0x0/0x2c [swim_mod] [<0020a7be>] __platform_driver_register+0x38/0x3c [<010bd028>] swim_init+0x28/0x2c [swim_mod] [<000020dc>] do_one_initcall+0x38/0x196 [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<00075008>] __free_pages+0x0/0x38 [<000045c0>] mangle_kernel_stack+0x30/0xda [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<0005ced4>] do_init_module+0x42/0x266 [<010bd000>] swim_init+0x0/0x2c [swim_mod] [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<0005eda0>] load_module+0x1a30/0x1e70 [<0000465d>] mangle_kernel_stack+0xcd/0xda [<00331c64>] __generic_copy_from_user+0x0/0x46 [<0033256e>] _cond_resched+0x0/0x32 [<00331b9c>] memset+0x0/0x98 [<0033256e>] _cond_resched+0x0/0x32 [<0005f25c>] SyS_init_module+0x7c/0x112 [<00002000>] _start+0x0/0x8 [<00002000>] _start+0x0/0x8 [<00331c82>] __generic_copy_from_user+0x1e/0x46 [<0005f2b2>] SyS_init_module+0xd2/0x112 [<0000465d>] mangle_kernel_stack+0xcd/0xda [<00002b40>] syscall+0x8/0xc [<0000465d>] mangle_kernel_stack+0xcd/0xda [<0008c00c>] pcpu_balance_workfn+0xb2/0x40e Code: 2200 7419 e4a9 e589 2841 d9fc 0000 1000 <2414> 7203 c282 7602 b681 6600 0096 0242 fe00 0482 0000 0000 e9c0 11c3 ed89 2642 There's no need to call ioremap() for the SWIM address range, as it lies within the usual IO device region at 0x5000 0000, which has already been mapped by head.S. Remove the redundant ioremap() and iounmap() calls to fix the hang. Cc: Laurent Vivier Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 64e066eba72e..92f0cddc597e 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -911,7 +911,7 @@ static int swim_probe(struct platform_device *dev) goto out; } - swim_base = ioremap(res->start, resource_size(res)); + swim_base = (struct swim __iomem *)res->start; if (!swim_base) { ret = -ENOMEM; goto out_release_io; @@ -923,7 +923,7 @@ static int swim_probe(struct platform_device *dev) if (!get_swim_mode(swim_base)) { printk(KERN_INFO "SWIM device not found !\n"); ret = -ENODEV; - goto out_iounmap; + goto out_release_io; } /* set platform driver data */ @@ -931,7 +931,7 @@ static int swim_probe(struct platform_device *dev) swd = kzalloc(sizeof(struct swim_priv), GFP_KERNEL); if (!swd) { ret = -ENOMEM; - goto out_iounmap; + goto out_release_io; } platform_set_drvdata(dev, swd); @@ -945,8 +945,6 @@ static int swim_probe(struct platform_device *dev) out_kfree: kfree(swd); -out_iounmap: - iounmap(swim_base); out_release_io: release_mem_region(res->start, resource_size(res)); out: @@ -974,8 +972,6 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) floppy_eject(&swd->unit[drive]); - iounmap(swd->base); - res = platform_get_resource(dev, IORESOURCE_MEM, 0); if (res) release_mem_region(res->start, resource_size(res)); From 7ae6a2b6cc058005ee3d0d2b9ce27688e51afa4b Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 198/660] block/swim: Fix array bounds check In the floppy_find() function in swim.c is a call to get_disk(swd->unit[drive].disk). The actual parameter to this call can be a NULL pointer when drive == swd->floppy_count. This causes an oops in get_disk(). Data read fault at 0x00000198 in Super Data (pc=0x1be5b6) BAD KERNEL BUSERR Oops: 00000000 Modules linked in: swim_mod ipv6 mac8390 PC: [<001be5b6>] get_disk+0xc/0x76 SR: 2004 SP: 9a078bc1 a2: 0213ed90 d0: 00000000 d1: 00000000 d2: 00000000 d3: 000000ff d4: 00000002 d5: 02983590 a0: 02332e00 a1: 022dfd64 Process dd (pid: 285, task=020ab25b) Frame format=B ssw=074d isc=4a88 isb=6732 daddr=00000198 dobuf=00000000 baddr=001be5bc dibuf=bfffffff ver=f Stack from 022dfca4: 00000000 0203fc00 0213ed90 022dfcc0 02982936 00000000 00200000 022dfd08 0020f85a 00200000 022dfd64 02332e00 004040fc 00000014 001be77e 022dfd64 00334e4a 001be3f8 0800001d 022dfd64 01c04b60 01c04b70 022aba80 029828f8 02332e00 022dfd2c 001be7ac 0203fc00 00200000 022dfd64 02103a00 01c04b60 01c04b60 0200e400 022dfd68 000e191a 00200000 022dfd64 02103a00 0800001d 00000000 00000003 000b89de 00500000 02103a00 01c04b60 02103a08 01c04c2e Call Trace: [<02982936>] floppy_find+0x3e/0x4a [swim_mod] [<00200000>] uart_remove_one_port+0x1a2/0x260 [<0020f85a>] kobj_lookup+0xde/0x132 [<00200000>] uart_remove_one_port+0x1a2/0x260 [<001be77e>] get_gendisk+0x0/0x130 [<00334e4a>] mutex_lock+0x0/0x2e [<001be3f8>] disk_block_events+0x0/0x6c [<029828f8>] floppy_find+0x0/0x4a [swim_mod] [<001be7ac>] get_gendisk+0x2e/0x130 [<00200000>] uart_remove_one_port+0x1a2/0x260 [<000e191a>] __blkdev_get+0x32/0x45a [<00200000>] uart_remove_one_port+0x1a2/0x260 [<000b89de>] complete_walk+0x0/0x8a [<000e1e22>] blkdev_get+0xe0/0x29a [<000e1fdc>] blkdev_open+0x0/0xb0 [<000b89de>] complete_walk+0x0/0x8a [<000e1fdc>] blkdev_open+0x0/0xb0 [<000e01cc>] bd_acquire+0x74/0x8a [<000e205c>] blkdev_open+0x80/0xb0 [<000e1fdc>] blkdev_open+0x0/0xb0 [<000abf24>] do_dentry_open+0x1a4/0x322 [<00020000>] __do_proc_douintvec+0x22/0x27e [<000b89de>] complete_walk+0x0/0x8a [<000baa62>] link_path_walk+0x0/0x48e [<000ba3f8>] inode_permission+0x20/0x54 [<000ac0e4>] vfs_open+0x42/0x78 [<000bc372>] path_openat+0x2b2/0xeaa [<000bc0c0>] path_openat+0x0/0xeaa [<0004463e>] __irq_wake_thread+0x0/0x4e [<0003a45a>] task_tick_fair+0x18/0xc8 [<000bd00a>] do_filp_open+0xa0/0xea [<000abae0>] do_sys_open+0x11a/0x1ee [<00020000>] __do_proc_douintvec+0x22/0x27e [<000abbf4>] SyS_open+0x1e/0x22 [<00020000>] __do_proc_douintvec+0x22/0x27e [<00002b40>] syscall+0x8/0xc [<00020000>] __do_proc_douintvec+0x22/0x27e [<0000c00b>] dyadic+0x1/0x28 Code: 4e5e 4e75 4e56 fffc 2f0b 2f02 266e 0008 <206b> 0198 4a88 6732 2428 002c 661e 486b 0058 4eb9 0032 0b96 588f 4a88 672c 2008 Disabling lock debugging due to kernel taint Fix the array index bounds check to avoid this. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Fixes: 8852ecd97488 ("[PATCH] m68k: mac - Add SWIM floppy support") Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Reviewed-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- drivers/block/swim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 92f0cddc597e..2cdfc0db5966 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -795,7 +795,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) struct swim_priv *swd = data; int drive = (*part & 3); - if (drive > swd->floppy_count) + if (drive >= swd->floppy_count) return NULL; *part = 0; From c1d6207cc0eef2a7f8551f9c7420d8776268f6e1 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 199/660] block/swim: Remove extra put_disk() call from error path Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Fixes: 103db8b2dfa5 ("[PATCH] swim: stop sharing request queue across multiple gendisks") Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Reviewed-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- drivers/block/swim.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 2cdfc0db5966..0258a96e0c46 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -861,7 +861,6 @@ static int swim_floppy_init(struct swim_priv *swd) &swd->lock); if (!swd->unit[drive].disk->queue) { err = -ENOMEM; - put_disk(swd->unit[drive].disk); goto exit_put_disks; } blk_queue_bounce_limit(swd->unit[drive].disk->queue, From 8e2ab5a4efaac77fb93e5b5b109d0b3976fdd3a0 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 200/660] block/swim: Don't log an error message for an invalid ioctl The 'eject' shell command may send various different ioctl commands. This leads to error messages on the console even though the FDEJECT ioctl succeeds. ~# eject floppy SWIM floppy_ioctl: unknown cmd 21257 SWIM floppy_ioctl: unknown cmd 1 Don't log an error message for an invalid ioctl, just do as the swim3 driver does and return -ENOTTY. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Reviewed-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- drivers/block/swim.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 0258a96e0c46..7b847170cf71 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -727,14 +727,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode, if (copy_to_user((void __user *) param, (void *) &floppy_type, sizeof(struct floppy_struct))) return -EFAULT; - break; - - default: - printk(KERN_DEBUG "SWIM floppy_ioctl: unknown cmd %d\n", - cmd); - return -ENOSYS; + return 0; } - return 0; + return -ENOTTY; } static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo) From 56a1c5ee54f69dd767fb61d301883dc919ddc259 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 201/660] block/swim: Rename macros to avoid inconsistent inverted logic The Sony drive status bits use active-low logic. The swim_readbit() function converts that to 'C' logic for readability. Hence, the sense of the names of the status bit macros should not be inverted. Mostly they are correct. However, the TWOMEG_DRIVE, MFM_MODE and TWOMEG_MEDIA macros have inverted sense (like MkLinux). Fix this inconsistency and make the following patches less confusing. The same problem affects swim3.c so fix that too. No functional change. The FDHD drive status bits are documented in sonydriv.cpp from MAME and in swimiii.h from MkLinux. Cc: Laurent Vivier Cc: Benjamin Herrenschmidt Cc: linuxppc-dev@lists.ozlabs.org Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 8 ++++---- drivers/block/swim3.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 7b847170cf71..d1ee4670666a 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -110,7 +110,7 @@ struct iwm { /* Select values for swim_select and swim_readbit */ #define READ_DATA_0 0x074 -#define TWOMEG_DRIVE 0x075 +#define ONEMEG_DRIVE 0x075 #define SINGLE_SIDED 0x076 #define DRIVE_PRESENT 0x077 #define DISK_IN 0x170 @@ -118,9 +118,9 @@ struct iwm { #define TRACK_ZERO 0x172 #define TACHO 0x173 #define READ_DATA_1 0x174 -#define MFM_MODE 0x175 +#define GCR_MODE 0x175 #define SEEK_COMPLETE 0x176 -#define ONEMEG_MEDIA 0x177 +#define TWOMEG_MEDIA 0x177 /* Bits in handshake register */ @@ -612,7 +612,7 @@ static void setup_medium(struct floppy_state *fs) struct floppy_struct *g; fs->disk_in = 1; fs->write_protected = swim_readbit(base, WRITE_PROT); - fs->type = swim_readbit(base, ONEMEG_MEDIA); + fs->type = swim_readbit(base, TWOMEG_MEDIA); if (swim_track00(base)) printk(KERN_ERR diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index af51015d056e..469541c1e51e 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -148,7 +148,7 @@ struct swim3 { #define MOTOR_ON 2 #define RELAX 3 /* also eject in progress */ #define READ_DATA_0 4 -#define TWOMEG_DRIVE 5 +#define ONEMEG_DRIVE 5 #define SINGLE_SIDED 6 /* drive or diskette is 4MB type? */ #define DRIVE_PRESENT 7 #define DISK_IN 8 @@ -156,9 +156,9 @@ struct swim3 { #define TRACK_ZERO 10 #define TACHO 11 #define READ_DATA_1 12 -#define MFM_MODE 13 +#define GCR_MODE 13 #define SEEK_COMPLETE 14 -#define ONEMEG_MEDIA 15 +#define TWOMEG_MEDIA 15 /* Definitions of values used in writing and formatting */ #define DATA_ESCAPE 0x99 From 8a500df63d07d8aee44b7ee2c54e462e47ce93ec Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 202/660] block/swim: Check drive type The SWIM chip is compatible with GCR-mode Sony 400K/800K drives but this driver only supports MFM mode. Therefore only Sony FDHD drives are supported. Skip incompatible drives. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index d1ee4670666a..c8c8b9da3edd 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -829,10 +829,12 @@ static int swim_floppy_init(struct swim_priv *swd) /* scan floppy drives */ swim_drive(base, INTERNAL_DRIVE); - if (swim_readbit(base, DRIVE_PRESENT)) + if (swim_readbit(base, DRIVE_PRESENT) && + !swim_readbit(base, ONEMEG_DRIVE)) swim_add_floppy(swd, INTERNAL_DRIVE); swim_drive(base, EXTERNAL_DRIVE); - if (swim_readbit(base, DRIVE_PRESENT)) + if (swim_readbit(base, DRIVE_PRESENT) && + !swim_readbit(base, ONEMEG_DRIVE)) swim_add_floppy(swd, EXTERNAL_DRIVE); /* register floppy drives */ From 5a13388d7aa1177b98d7168330ecbeeac52f844d Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 203/660] block/swim: Fix IO error at end of medium Reading to the end of a 720K disk results in an IO error instead of EOF because the block layer thinks the disk has 2880 sectors. (Partly this is a result of inverted logic of the ONEMEG_MEDIA bit that's now fixed.) Initialize the density and head count in swim_add_floppy() to agree with the device size passed to set_capacity() during drive probe. Call set_capacity() again upon device open, after refreshing the density and head count values. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index c8c8b9da3edd..2c75761b61e8 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -612,7 +612,6 @@ static void setup_medium(struct floppy_state *fs) struct floppy_struct *g; fs->disk_in = 1; fs->write_protected = swim_readbit(base, WRITE_PROT); - fs->type = swim_readbit(base, TWOMEG_MEDIA); if (swim_track00(base)) printk(KERN_ERR @@ -620,6 +619,9 @@ static void setup_medium(struct floppy_state *fs) swim_track00(base); + fs->type = swim_readbit(base, TWOMEG_MEDIA) ? + HD_MEDIA : DD_MEDIA; + fs->head_number = swim_readbit(base, SINGLE_SIDED) ? 1 : 2; get_floppy_geometry(fs, 0, &g); fs->total_secs = g->size; fs->secpercyl = g->head * g->sect; @@ -656,6 +658,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) goto out; } + set_capacity(fs->disk, fs->total_secs); + if (mode & FMODE_NDELAY) return 0; @@ -808,10 +812,9 @@ static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) swim_motor(base, OFF); - if (swim_readbit(base, SINGLE_SIDED)) - fs->head_number = 1; - else - fs->head_number = 2; + fs->type = HD_MEDIA; + fs->head_number = 2; + fs->ref_count = 0; fs->ejected = 1; From b3906535ccc6cd04c42f9b1c7e31d1947b3ebc74 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: [PATCH 204/660] block/swim: Select appropriate drive on device open The driver supports internal and external FDD units so the floppy_open function must not hard-code the drive location. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 2c75761b61e8..0e31884a9519 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -648,7 +648,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) swim_write(base, setup, S_IBM_DRIVE | S_FCLK_DIV2); udelay(10); - swim_drive(base, INTERNAL_DRIVE); + swim_drive(base, fs->location); swim_motor(base, ON); swim_action(base, SETMFM); if (fs->ejected) From f4560231ec42092c6662acccabb28c6cac9f5dfb Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Tue, 17 Apr 2018 11:46:20 +0800 Subject: [PATCH 205/660] blk-mq: start request gstate with gen 1 rq->gstate and rq->aborted_gstate both are zero before rqs are allocated. If we have a small timeout, when the timer fires, there could be rqs that are never allocated, and also there could be rq that has been allocated but not initialized and started. At the moment, the rq->gstate and rq->aborted_gstate both are 0, thus the blk_mq_terminate_expired will identify the rq is timed out and invoke .timeout early. For scsi, this will cause scsi_times_out to be invoked before the scsi_cmnd is not initialized, scsi_cmnd->device is still NULL at the moment, then we will get crash. Cc: Bart Van Assche Cc: Tejun Heo Cc: Ming Lei Cc: Martin Steigerwald Cc: stable@vger.kernel.org Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++++ block/blk-mq.c | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 3e77409e5a84..85909b431eb0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -201,6 +201,10 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->part = NULL; seqcount_init(&rq->gstate_seq); u64_stats_init(&rq->aborted_gstate_sync); + /* + * See comment of blk_mq_init_request + */ + WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); } EXPORT_SYMBOL(blk_rq_init); diff --git a/block/blk-mq.c b/block/blk-mq.c index 0dc9e341c2a7..e4aa36817367 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2042,6 +2042,13 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, seqcount_init(&rq->gstate_seq); u64_stats_init(&rq->aborted_gstate_sync); + /* + * start gstate with gen 1 instead of 0, otherwise it will be equal + * to aborted_gstate, and be identified timed out by + * blk_mq_terminate_expired. + */ + WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); + return 0; } From 5c8dad48e4f53d6fd0a7e4f95d7c1c983374de88 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 13 Apr 2018 11:55:13 -0700 Subject: [PATCH 206/660] trace_kprobe: Remove warning message "Could not insert probe at..." This warning message is not very helpful, as the return value should already show information about the error. Also, this message will spam dmesg if the user space does testing in a loop, like: for x in {0..5} do echo p:xx xx+$x >> /sys/kernel/debug/tracing/kprobe_events done Reported-by: Vince Weaver Signed-off-by: Song Liu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20180413185513.3626052-1-songliubraving@fb.com Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1cd3fb4d70f8..02aed76e0978 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -512,8 +512,6 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) if (ret == 0) tk->tp.flags |= TP_FLAG_REGISTERED; else { - pr_warn("Could not insert probe at %s+%lu: %d\n", - trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret); if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) { pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); ret = 0; From b11954a6971baa5842e24e6c0dcf56f117249638 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Fri, 30 Mar 2018 15:11:30 +0100 Subject: [PATCH 207/660] drm/exynos: Move GEM BOs to drm_framebuffer Since drm_framebuffer can now store GEM objects directly, place them there rather than in our own subclass. As this makes the framebuffer create_handle and destroy functions the same as the GEM framebuffer helper, we can reuse those. Signed-off-by: Daniel Stone Signed-off-by: Inki Dae Cc: Inki Dae Cc: Joonyoung Shim Cc: Seung-Woo Kim Cc: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 39 +++----------------------- 1 file changed, 4 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 0faaf829f5bf..f28ce493e314 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "exynos_drm_drv.h" @@ -36,7 +37,6 @@ */ struct exynos_drm_fb { struct drm_framebuffer fb; - struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; dma_addr_t dma_addr[MAX_FB_BUFFER]; }; @@ -66,40 +66,9 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev, return 0; } -static void exynos_drm_fb_destroy(struct drm_framebuffer *fb) -{ - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); - unsigned int i; - - drm_framebuffer_cleanup(fb); - - for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem); i++) { - struct drm_gem_object *obj; - - if (exynos_fb->exynos_gem[i] == NULL) - continue; - - obj = &exynos_fb->exynos_gem[i]->base; - drm_gem_object_unreference_unlocked(obj); - } - - kfree(exynos_fb); - exynos_fb = NULL; -} - -static int exynos_drm_fb_create_handle(struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); - - return drm_gem_handle_create(file_priv, - &exynos_fb->exynos_gem[0]->base, handle); -} - static const struct drm_framebuffer_funcs exynos_drm_fb_funcs = { - .destroy = exynos_drm_fb_destroy, - .create_handle = exynos_drm_fb_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; struct drm_framebuffer * @@ -121,7 +90,7 @@ exynos_drm_framebuffer_init(struct drm_device *dev, if (ret < 0) goto err; - exynos_fb->exynos_gem[i] = exynos_gem[i]; + exynos_fb->fb.obj[i] = &exynos_gem[i]->base; exynos_fb->dma_addr[i] = exynos_gem[i]->dma_addr + mode_cmd->offsets[i]; } From 7b30508f5116574f94b50c71d3da1089d145e603 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Fri, 30 Mar 2018 15:11:31 +0100 Subject: [PATCH 208/660] drm/exynos: Move dma_addr out of exynos_drm_fb This can be calculated from the GEM BO DMA address as well as the offset stored in the base framebuffer. Signed-off-by: Daniel Stone Signed-off-by: Inki Dae Cc: Inki Dae Cc: Joonyoung Shim Cc: Seung-Woo Kim Cc: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index f28ce493e314..168c71f80b72 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -37,7 +37,6 @@ */ struct exynos_drm_fb { struct drm_framebuffer fb; - dma_addr_t dma_addr[MAX_FB_BUFFER]; }; static int check_fb_gem_memory_type(struct drm_device *drm_dev, @@ -91,8 +90,6 @@ exynos_drm_framebuffer_init(struct drm_device *dev, goto err; exynos_fb->fb.obj[i] = &exynos_gem[i]->base; - exynos_fb->dma_addr[i] = exynos_gem[i]->dma_addr - + mode_cmd->offsets[i]; } drm_helper_mode_fill_fb_struct(dev, &exynos_fb->fb, mode_cmd); @@ -160,12 +157,13 @@ err: dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index) { - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); + struct exynos_drm_gem *exynos_gem; if (WARN_ON_ONCE(index >= MAX_FB_BUFFER)) return 0; - return exynos_fb->dma_addr[index]; + exynos_gem = to_exynos_gem(fb->obj[index]); + return exynos_gem->dma_addr + fb->offsets[index]; } static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = { From ff059fcbeed9cbed7421f82d1463dd74c472636e Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Fri, 30 Mar 2018 15:11:32 +0100 Subject: [PATCH 209/660] drm/exynos: exynos_drm_fb -> drm_framebuffer Now exynos_drm_fb is just an empty wrapper around drm_framebuffer, we can drop it. Signed-off-by: Daniel Stone Signed-off-by: Inki Dae Cc: Inki Dae Cc: Joonyoung Shim Cc: Seung-Woo Kim Cc: Kyungmin Park --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 28 ++++++++------------------ 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 168c71f80b72..f0e79178bde6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -27,18 +27,6 @@ #include "exynos_drm_iommu.h" #include "exynos_drm_crtc.h" -#define to_exynos_fb(x) container_of(x, struct exynos_drm_fb, fb) - -/* - * exynos specific framebuffer structure. - * - * @fb: drm framebuffer obejct. - * @exynos_gem: array of exynos specific gem object containing a gem object. - */ -struct exynos_drm_fb { - struct drm_framebuffer fb; -}; - static int check_fb_gem_memory_type(struct drm_device *drm_dev, struct exynos_drm_gem *exynos_gem) { @@ -76,12 +64,12 @@ exynos_drm_framebuffer_init(struct drm_device *dev, struct exynos_drm_gem **exynos_gem, int count) { - struct exynos_drm_fb *exynos_fb; + struct drm_framebuffer *fb; int i; int ret; - exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL); - if (!exynos_fb) + fb = kzalloc(sizeof(*fb), GFP_KERNEL); + if (!fb) return ERR_PTR(-ENOMEM); for (i = 0; i < count; i++) { @@ -89,21 +77,21 @@ exynos_drm_framebuffer_init(struct drm_device *dev, if (ret < 0) goto err; - exynos_fb->fb.obj[i] = &exynos_gem[i]->base; + fb->obj[i] = &exynos_gem[i]->base; } - drm_helper_mode_fill_fb_struct(dev, &exynos_fb->fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); - ret = drm_framebuffer_init(dev, &exynos_fb->fb, &exynos_drm_fb_funcs); + ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs); if (ret < 0) { DRM_ERROR("failed to initialize framebuffer\n"); goto err; } - return &exynos_fb->fb; + return fb; err: - kfree(exynos_fb); + kfree(fb); return ERR_PTR(ret); } From 596a9f6768af58d7034f6ebe559a0d489ae61467 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 12 Apr 2018 14:21:54 -0500 Subject: [PATCH 210/660] objtool: Support HOSTCFLAGS and HOSTLDFLAGS It may be useful to compile host programs with different flags (e.g. hardening). Ensure that objtool picks up the appropriate flags. Signed-off-by: Laura Abbott Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kbuild@vger.kernel.org Link: http://lkml.kernel.org/r/05a360681176f1423cb2fde8faae3a0a0261afc5.1523560825.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 8ae824dbfca3..f76d9914686a 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -31,8 +31,8 @@ INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/objtool/arch/$(ARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -CFLAGS += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) -LDFLAGS += -lelf $(LIBSUBCMD) +CFLAGS += -Werror $(WARNINGS) $(HOSTCFLAGS) -g $(INCLUDES) +LDFLAGS += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS) # Allow old libelf to be used: elfshdr := $(shell echo '$(pound)include ' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) From f0cf47d939d0b4b4f660c5aaa4276fa3488f3391 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Apr 2018 14:48:24 +0100 Subject: [PATCH 211/660] KVM: arm/arm64: Close VMID generation race Before entering the guest, we check whether our VMID is still part of the current generation. In order to avoid taking a lock, we start with checking that the generation is still current, and only if not current do we take the lock, recheck, and update the generation and VMID. This leaves open a small race: A vcpu can bump up the global generation number as well as the VM's, but has not updated the VMID itself yet. At that point another vcpu from the same VM comes in, checks the generation (and finds it not needing anything), and jumps into the guest. At this point, we end-up with two vcpus belonging to the same VM running with two different VMIDs. Eventually, the VMID used by the second vcpu will get reassigned, and things will really go wrong... A simple solution would be to drop this initial check, and always take the lock. This is likely to cause performance issues. A middle ground is to convert the spinlock to a rwlock, and only take the read lock on the fast path. If the check fails at that point, drop it and acquire the write lock, rechecking the condition. This ensures that the above scenario doesn't occur. Cc: stable@vger.kernel.org Reported-by: Mark Rutland Tested-by: Shannon Zhao Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index dba629c5f8ac..a4c1b76240df 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -63,7 +63,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; static unsigned int kvm_vmid_bits __read_mostly; -static DEFINE_SPINLOCK(kvm_vmid_lock); +static DEFINE_RWLOCK(kvm_vmid_lock); static bool vgic_present; @@ -473,11 +473,16 @@ static void update_vttbr(struct kvm *kvm) { phys_addr_t pgd_phys; u64 vmid; + bool new_gen; - if (!need_new_vmid_gen(kvm)) + read_lock(&kvm_vmid_lock); + new_gen = need_new_vmid_gen(kvm); + read_unlock(&kvm_vmid_lock); + + if (!new_gen) return; - spin_lock(&kvm_vmid_lock); + write_lock(&kvm_vmid_lock); /* * We need to re-check the vmid_gen here to ensure that if another vcpu @@ -485,7 +490,7 @@ static void update_vttbr(struct kvm *kvm) * use the same vmid. */ if (!need_new_vmid_gen(kvm)) { - spin_unlock(&kvm_vmid_lock); + write_unlock(&kvm_vmid_lock); return; } @@ -519,7 +524,7 @@ static void update_vttbr(struct kvm *kvm) vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; - spin_unlock(&kvm_vmid_lock); + write_unlock(&kvm_vmid_lock); } static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) From 6522404f1a5780685cf2b3ab7fd811126ea07ba2 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 16 Apr 2018 20:28:31 +0200 Subject: [PATCH 212/660] MAINTAINERS: Update e-mail address for Christoffer Dall Update my e-mail address to a working address. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0a1410d5a621..3e9c99d2620b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7738,7 +7738,7 @@ F: arch/x86/include/asm/svm.h F: arch/x86/kvm/svm.c KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm) -M: Christoffer Dall +M: Christoffer Dall M: Marc Zyngier L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu @@ -7752,7 +7752,7 @@ F: virt/kvm/arm/ F: include/kvm/arm_* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) -M: Christoffer Dall +M: Christoffer Dall M: Marc Zyngier L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu From fae764912153065ea55eda47f834e0764a54df94 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 12 Apr 2018 13:48:25 +0200 Subject: [PATCH 213/660] s390/signal: cleanup uapi struct sigaction The struct sigaction for user space in arch/s390/include/uapi/asm/signal.h is ill defined. The kernel uses two structures 'struct sigaction' and 'struct old_sigaction', the correlation in the kernel for both 31 and 64 bit is as follows sys_sigaction -> struct old_sigaction sys_rt_sigaction -> struct sigaction The correlation of the (single) uapi definition for 'struct sigaction' under '#ifndef __KERNEL__': 31-bit: sys_sigaction -> uapi struct sigaction 31-bit: sys_rt_sigaction -> no structure available 64-bit: sys_sigaction -> no structure available 64-bit: sys_rt_sigaction -> uapi struct sigaction This is quite confusing. To make it a bit less confusing make the uapi definition of 'struct sigaction' usable for sys_rt_sigaction for both 31-bit and 64-bit. Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/signal.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h index c57f9d28d894..9a14a611ed82 100644 --- a/arch/s390/include/uapi/asm/signal.h +++ b/arch/s390/include/uapi/asm/signal.h @@ -97,22 +97,31 @@ typedef unsigned long sigset_t; #include #ifndef __KERNEL__ -/* Here we must cater to libcs that poke about in kernel headers. */ +/* + * There are two system calls in regard to sigaction, sys_rt_sigaction + * and sys_sigaction. Internally the kernel uses the struct old_sigaction + * for the older sys_sigaction system call, and the kernel version of the + * struct sigaction for the newer sys_rt_sigaction. + * + * The uapi definition for struct sigaction has made a strange distinction + * between 31-bit and 64-bit in the past. For 64-bit the uapi structure + * looks like the kernel struct sigaction, but for 31-bit it used to + * look like the kernel struct old_sigaction. That practically made the + * structure unusable for either system call. To get around this problem + * the glibc always had its own definitions for the sigaction structures. + * + * The current struct sigaction uapi definition below is suitable for the + * sys_rt_sigaction system call only. + */ struct sigaction { union { __sighandler_t _sa_handler; void (*_sa_sigaction)(int, struct siginfo *, void *); } _u; -#ifndef __s390x__ /* lovely */ - sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -#else /* __s390x__ */ unsigned long sa_flags; void (*sa_restorer)(void); sigset_t sa_mask; -#endif /* __s390x__ */ }; #define sa_handler _u._sa_handler From 765cca91b895c8b747bca0b5fa54d1dc85c867a7 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 17 Apr 2018 09:28:59 +1000 Subject: [PATCH 214/660] netfilter: conntrack: include kmemleak.h for kmemleak_not_leak() After merging the netfilter tree, today's linux-next build (powerpc ppc64_defconfig) failed like this: net/netfilter/nf_conntrack_extend.c: In function 'nf_ct_ext_add': net/netfilter/nf_conntrack_extend.c:74:2: error: implicit declaration of function 'kmemleak_not_leak' [-Werror=implicit-function-declaration] kmemleak_not_leak(old); ^~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors Fixes: 114aa35d06d4 ("netfilter: conntrack: silent a memory leak warning") Signed-off-by: Stephen Rothwell Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_extend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bd71a828ebde..277bbfe26478 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include +#include #include #include #include From 9dfbf78e4114fcaf4ef61c49885c3ab5bad40d0b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 18 Jan 2018 00:33:36 +0530 Subject: [PATCH 215/660] powerpc/64s: Default l1d_size to 64K in RFI fallback flush If there is no d-cache-size property in the device tree, l1d_size could be zero. We don't actually expect that to happen, it's only been seen on mambo (simulator) in some configurations. A zero-size l1d_size leads to the loop in the asm wrapping around to 2^64-1, and then walking off the end of the fallback area and eventually causing a page fault which is fatal. Just default to 64K which is correct on some CPUs, and sane enough to not cause a crash on others. Fixes: aa8a5e0062ac9 ('powerpc/64s: Add support for RFI flush of L1-D cache') Signed-off-by: Madhavan Srinivasan [mpe: Rewrite comment and change log] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 44c30dd38067..b78f142a4148 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -890,6 +890,17 @@ static void __ref init_fallback_flush(void) return; l1d_size = ppc64_caches.l1d.size; + + /* + * If there is no d-cache-size property in the device tree, l1d_size + * could be zero. That leads to the loop in the asm wrapping around to + * 2^64-1, and then walking off the end of the fallback area and + * eventually causing a page fault which is fatal. Just default to + * something vaguely sane. + */ + if (!l1d_size) + l1d_size = (64 * 1024); + limit = min(ppc64_bolted_size(), ppc64_rma_size); /* From ef97837db916075af54554552628bcb0840df62f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Apr 2018 14:44:42 +0200 Subject: [PATCH 216/660] x86: Remove pci-nommu.c The commit that switched x86 to dma_direct_ops stopped using and building this file, but accidentally left it in the tree. Remove it. Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: iommu@lists.infradead.org Link: https://lkml.kernel.org/r/20180416124442.13831-1-hch@lst.de --- arch/x86/kernel/pci-nommu.c | 90 ------------------------------------- 1 file changed, 90 deletions(-) delete mode 100644 arch/x86/kernel/pci-nommu.c diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c deleted file mode 100644 index ac7ea3a8242f..000000000000 --- a/arch/x86/kernel/pci-nommu.c +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Fallback functions when the main IOMMU code is not compiled in. This - code is roughly equivalent to i386. */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define NOMMU_MAPPING_ERROR 0 - -static int -check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) -{ - if (hwdev && !dma_capable(hwdev, bus, size)) { - if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) - printk(KERN_ERR - "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static dma_addr_t nommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset; - WARN_ON(size == 0); - if (!check_addr("map_single", dev, bus, size)) - return NOMMU_MAPPING_ERROR; - return bus; -} - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - BUG_ON(!sg_page(s)); - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - s->dma_length = s->length; - } - return nents; -} - -static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == NOMMU_MAPPING_ERROR; -} - -const struct dma_map_ops nommu_dma_ops = { - .alloc = dma_generic_alloc_coherent, - .free = dma_generic_free_coherent, - .map_sg = nommu_map_sg, - .map_page = nommu_map_page, - .is_phys = 1, - .mapping_error = nommu_mapping_error, - .dma_supported = x86_dma_supported, -}; From d3878e164dcd3925a237a20e879432400e369172 Mon Sep 17 00:00:00 2001 From: Xiaoming Gao Date: Fri, 13 Apr 2018 17:48:08 +0800 Subject: [PATCH 217/660] x86/tsc: Prevent 32bit truncation in calc_hpet_ref() The TSC calibration code uses HPET as reference. The conversion normalizes the delta of two HPET timestamps: hpetref = ((tshpet1 - tshpet2) * HPET_PERIOD) / 1e6 and then divides the normalized delta of the corresponding TSC timestamps by the result to calulate the TSC frequency. tscfreq = ((tstsc1 - tstsc2 ) * 1e6) / hpetref This uses do_div() which takes an u32 as the divisor, which worked so far because the HPET frequency was low enough that 'hpetref' never exceeded 32bit. On Skylake machines the HPET frequency increased so 'hpetref' can exceed 32bit. do_div() truncates the divisor, which causes the calibration to fail. Use div64_u64() to avoid the problem. [ tglx: Fixes whitespace mangled patch and rewrote changelog ] Signed-off-by: Xiaoming Gao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: peterz@infradead.org Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/38894564-4fc9-b8ec-353f-de702839e44e@gmail.com --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ef32297ff17e..91e6da48cbb6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -317,7 +317,7 @@ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) hpet2 -= hpet1; tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); do_div(tmp, 1000000); - do_div(deltatsc, tmp); + deltatsc = div64_u64(deltatsc, tmp); return (unsigned long) deltatsc; } From 10daf10ab154e31237a8c07242be3063fb6a9bf4 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 12 Apr 2018 09:40:52 +0800 Subject: [PATCH 218/660] x86/acpi: Prevent X2APIC id 0xffffffff from being accounted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RongQing reported that there are some X2APIC id 0xffffffff in his machine's ACPI MADT table, which makes the number of possible CPU inaccurate. The reason is that the ACPI X2APIC parser has no sanity check for APIC ID 0xffffffff, which is an invalid id in all APIC types. See "Intel® 64 Architecture x2APIC Specification", Chapter 2.4.1. Add a sanity check to acpi_parse_x2apic() which ignores the invalid id. Reported-by: Li RongQing Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: len.brown@intel.com Cc: rjw@rjwysocki.net Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/20180412014052.25186-1-douly.fnst@cn.fujitsu.com --- arch/x86/kernel/acpi/boot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dde444f932c1..3b20607d581b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -215,6 +215,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) apic_id = processor->local_apic_id; enabled = processor->lapic_flags & ACPI_MADT_ENABLED; + /* Ignore invalid ID */ + if (apic_id == 0xffffffff) + return 0; + /* * We need to register disabled CPU as well to permit * counting disabled CPUs. This allows us to size From 451cf3ca7d4615631443014ee769c25e267c25ff Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 4 Apr 2018 14:45:27 +0800 Subject: [PATCH 219/660] x86/processor: Remove two unused function declarations early_trap_init() and cpu_set_gdt() have been removed, so remove the stale declarations as well. Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: keescook@chromium.org Cc: luto@kernel.org Cc: hpa@zytor.com Cc: bp@suse.de Cc: kirill.shutemov@linux.intel.com Link: https://lkml.kernel.org/r/20180404064527.10562-1-douly.fnst@cn.fujitsu.com --- arch/x86/include/asm/processor.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4fa4206029e3..21a114914ba4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -749,13 +749,11 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, extern void enable_sep_cpu(void); extern int sysenter_setup(void); -extern void early_trap_init(void); void early_trap_pf_init(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; -extern void cpu_set_gdt(int); extern void switch_to_new_gdt(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); From 5db8f8d1099bd93a64a80b609dbcce887327ffc8 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 3 Apr 2018 11:19:01 +0200 Subject: [PATCH 220/660] PCI: kirin: Fix reset gpio name As documented in the devicetree bindings (pci/kirin-pcie.txt) and the reset gpio name must be 'reset-gpios'. However, current driver erroneously looks for a 'reset-gpio' resource which makes the driver probe fail. Fix it. Fixes: fc5165db245a ("PCI: kirin: Add HiSilicon Kirin SoC PCIe controller driver") Signed-off-by: Loic Poulain [lorenzo.pieralisi@arm.com: updated the commit log] Signed-off-by: Lorenzo Pieralisi Acked-by: Xiaowei Song --- drivers/pci/dwc/pcie-kirin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/dwc/pcie-kirin.c b/drivers/pci/dwc/pcie-kirin.c index a6b88c7f6e3e..d2970a009eb5 100644 --- a/drivers/pci/dwc/pcie-kirin.c +++ b/drivers/pci/dwc/pcie-kirin.c @@ -486,7 +486,7 @@ static int kirin_pcie_probe(struct platform_device *pdev) return ret; kirin_pcie->gpio_id_reset = of_get_named_gpio(dev->of_node, - "reset-gpio", 0); + "reset-gpios", 0); if (kirin_pcie->gpio_id_reset < 0) return -ENODEV; From feb12f0cd8d7b1e8df2e6fce19fc9a026a468cc2 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Mon, 26 Mar 2018 16:48:00 +0100 Subject: [PATCH 221/660] ASoC: topology: Fix bugs of freeing soc topology In snd_soc_tplg_component_remove(), it should compare index and not dobj->index with SND_SOC_TPLG_INDEX_ALL for removing all topology objects. Signed-off-by: Yan Wang Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 942c6e5eb4b7..5598e891b2b3 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2580,7 +2580,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) /* match index */ if (dobj->index != index && - dobj->index != SND_SOC_TPLG_INDEX_ALL) + index != SND_SOC_TPLG_INDEX_ALL) continue; switch (dobj->type) { From e91c2518a5d22a07642f35d85f39001ad379dae4 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 16 Apr 2018 13:36:46 +0200 Subject: [PATCH 222/660] livepatch: Initialize shadow variables safely by a custom callback The existing API allows to pass a sample data to initialize the shadow data. It works well when the data are position independent. But it fails miserably when we need to set a pointer to the shadow structure itself. Unfortunately, we might need to initialize the pointer surprisingly often because of struct list_head. It is even worse because the list might be hidden in other common structures, for example, struct mutex, struct wait_queue_head. For example, this was needed to fix races in ALSA sequencer. It required to add mutex into struct snd_seq_client. See commit b3defb791b26ea06 ("ALSA: seq: Make ioctls race-free") and commit d15d662e89fc667b9 ("ALSA: seq: Fix racy pool initializations") This patch makes the API more safe. A custom constructor function and data are passed to klp_shadow_*alloc() functions instead of the sample data. Note that ctor_data are no longer a template for shadow->data. It might point to any data that might be necessary when the constructor is called. Also note that the constructor is called under klp_shadow_lock. It is an internal spin_lock that synchronizes alloc() vs. get() operations, see klp_shadow_get_or_alloc(). On one hand, this adds a risk of ABBA deadlocks. On the other hand, it allows to do some operations safely. For example, we could add the new structure into an existing list. This must be done only once when the structure is allocated. Reported-by: Nicolai Stange Signed-off-by: Petr Mladek Acked-by: Josh Poimboeuf Acked-by: Miroslav Benes Signed-off-by: Jiri Kosina --- Documentation/livepatch/shadow-vars.txt | 31 ++++++--- include/linux/livepatch.h | 14 ++-- kernel/livepatch/shadow.c | 82 +++++++++++++++-------- samples/livepatch/livepatch-shadow-fix1.c | 18 ++++- samples/livepatch/livepatch-shadow-fix2.c | 6 +- 5 files changed, 104 insertions(+), 47 deletions(-) diff --git a/Documentation/livepatch/shadow-vars.txt b/Documentation/livepatch/shadow-vars.txt index 89c66634d600..9c7ae191641c 100644 --- a/Documentation/livepatch/shadow-vars.txt +++ b/Documentation/livepatch/shadow-vars.txt @@ -34,9 +34,13 @@ meta-data and shadow-data: - data[] - storage for shadow data It is important to note that the klp_shadow_alloc() and -klp_shadow_get_or_alloc() calls, described below, store a *copy* of the -data that the functions are provided. Callers should provide whatever -mutual exclusion is required of the shadow data. +klp_shadow_get_or_alloc() are zeroing the variable by default. +They also allow to call a custom constructor function when a non-zero +value is needed. Callers should provide whatever mutual exclusion +is required. + +Note that the constructor is called under klp_shadow_lock spinlock. It allows +to do actions that can be done only once when a new variable is allocated. * klp_shadow_get() - retrieve a shadow variable data pointer - search hashtable for pair @@ -47,7 +51,7 @@ mutual exclusion is required of the shadow data. - WARN and return NULL - if doesn't already exist - allocate a new shadow variable - - copy data into the new shadow variable + - initialize the variable using a custom constructor and data when provided - add to the global hashtable * klp_shadow_get_or_alloc() - get existing or alloc a new shadow variable @@ -56,7 +60,7 @@ mutual exclusion is required of the shadow data. - return existing shadow variable - if doesn't already exist - allocate a new shadow variable - - copy data into the new shadow variable + - initialize the variable using a custom constructor and data when provided - add pair to the global hashtable * klp_shadow_free() - detach and free a shadow variable @@ -107,7 +111,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp); /* Attach a corresponding shadow variable, then initialize it */ - ps_lock = klp_shadow_alloc(sta, PS_LOCK, NULL, sizeof(*ps_lock), gfp); + ps_lock = klp_shadow_alloc(sta, PS_LOCK, sizeof(*ps_lock), gfp, + NULL, NULL); if (!ps_lock) goto shadow_fail; spin_lock_init(ps_lock); @@ -148,16 +153,24 @@ shadow variables to parents already in-flight. For commit 1d147bfa6429, a good spot to allocate a shadow spinlock is inside ieee80211_sta_ps_deliver_wakeup(): +int ps_lock_shadow_ctor(void *obj, void *shadow_data, void *ctor_data) +{ + spinlock_t *lock = shadow_data; + + spin_lock_init(lock); + return 0; +} + #define PS_LOCK 1 void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { - DEFINE_SPINLOCK(ps_lock_fallback); spinlock_t *ps_lock; /* sync with ieee80211_tx_h_unicast_ps_buf */ ps_lock = klp_shadow_get_or_alloc(sta, PS_LOCK, - &ps_lock_fallback, sizeof(ps_lock_fallback), - GFP_ATOMIC); + sizeof(*ps_lock), GFP_ATOMIC, + ps_lock_shadow_ctor, NULL); + if (ps_lock) spin_lock(ps_lock); ... diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 4754f01c1abb..7e084321b146 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -186,11 +186,17 @@ static inline bool klp_have_reliable_stack(void) IS_ENABLED(CONFIG_HAVE_RELIABLE_STACKTRACE); } +typedef int (*klp_shadow_ctor_t)(void *obj, + void *shadow_data, + void *ctor_data); + void *klp_shadow_get(void *obj, unsigned long id); -void *klp_shadow_alloc(void *obj, unsigned long id, void *data, - size_t size, gfp_t gfp_flags); -void *klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, - size_t size, gfp_t gfp_flags); +void *klp_shadow_alloc(void *obj, unsigned long id, + size_t size, gfp_t gfp_flags, + klp_shadow_ctor_t ctor, void *ctor_data); +void *klp_shadow_get_or_alloc(void *obj, unsigned long id, + size_t size, gfp_t gfp_flags, + klp_shadow_ctor_t ctor, void *ctor_data); void klp_shadow_free(void *obj, unsigned long id); void klp_shadow_free_all(unsigned long id); diff --git a/kernel/livepatch/shadow.c b/kernel/livepatch/shadow.c index fdac27588d60..b10a0bbb7f84 100644 --- a/kernel/livepatch/shadow.c +++ b/kernel/livepatch/shadow.c @@ -113,8 +113,10 @@ void *klp_shadow_get(void *obj, unsigned long id) } EXPORT_SYMBOL_GPL(klp_shadow_get); -static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, - size_t size, gfp_t gfp_flags, bool warn_on_exist) +static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, + size_t size, gfp_t gfp_flags, + klp_shadow_ctor_t ctor, void *ctor_data, + bool warn_on_exist) { struct klp_shadow *new_shadow; void *shadow_data; @@ -125,18 +127,15 @@ static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, if (shadow_data) goto exists; - /* Allocate a new shadow variable for use inside the lock below */ + /* + * Allocate a new shadow variable. Fill it with zeroes by default. + * More complex setting can be done by @ctor function. But it is + * called only when the buffer is really used (under klp_shadow_lock). + */ new_shadow = kzalloc(size + sizeof(*new_shadow), gfp_flags); if (!new_shadow) return NULL; - new_shadow->obj = obj; - new_shadow->id = id; - - /* Initialize the shadow variable if data provided */ - if (data) - memcpy(new_shadow->data, data, size); - /* Look for again under the lock */ spin_lock_irqsave(&klp_shadow_lock, flags); shadow_data = klp_shadow_get(obj, id); @@ -150,6 +149,22 @@ static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, goto exists; } + new_shadow->obj = obj; + new_shadow->id = id; + + if (ctor) { + int err; + + err = ctor(obj, new_shadow->data, ctor_data); + if (err) { + spin_unlock_irqrestore(&klp_shadow_lock, flags); + kfree(new_shadow); + pr_err("Failed to construct shadow variable <%p, %lx> (%d)\n", + obj, id, err); + return NULL; + } + } + /* No found, so attach the newly allocated one */ hash_add_rcu(klp_shadow_hash, &new_shadow->node, (unsigned long)new_shadow->obj); @@ -170,26 +185,32 @@ exists: * klp_shadow_alloc() - allocate and add a new shadow variable * @obj: pointer to parent object * @id: data identifier - * @data: pointer to data to attach to parent * @size: size of attached data * @gfp_flags: GFP mask for allocation + * @ctor: custom constructor to initialize the shadow data (optional) + * @ctor_data: pointer to any data needed by @ctor (optional) * - * Allocates @size bytes for new shadow variable data using @gfp_flags - * and copies @size bytes from @data into the new shadow variable's own - * data space. If @data is NULL, @size bytes are still allocated, but - * no copy is performed. The new shadow variable is then added to the - * global hashtable. + * Allocates @size bytes for new shadow variable data using @gfp_flags. + * The data are zeroed by default. They are further initialized by @ctor + * function if it is not NULL. The new shadow variable is then added + * to the global hashtable. * - * If an existing shadow variable can be found, this routine - * will issue a WARN, exit early and return NULL. + * If an existing shadow variable can be found, this routine will + * issue a WARN, exit early and return NULL. + * + * This function guarantees that the constructor function is called only when + * the variable did not exist before. The cost is that @ctor is called + * in atomic context under a spin lock. * * Return: the shadow variable data element, NULL on duplicate or * failure. */ -void *klp_shadow_alloc(void *obj, unsigned long id, void *data, - size_t size, gfp_t gfp_flags) +void *klp_shadow_alloc(void *obj, unsigned long id, + size_t size, gfp_t gfp_flags, + klp_shadow_ctor_t ctor, void *ctor_data) { - return __klp_shadow_get_or_alloc(obj, id, data, size, gfp_flags, true); + return __klp_shadow_get_or_alloc(obj, id, size, gfp_flags, + ctor, ctor_data, true); } EXPORT_SYMBOL_GPL(klp_shadow_alloc); @@ -197,25 +218,28 @@ EXPORT_SYMBOL_GPL(klp_shadow_alloc); * klp_shadow_get_or_alloc() - get existing or allocate a new shadow variable * @obj: pointer to parent object * @id: data identifier - * @data: pointer to data to attach to parent * @size: size of attached data * @gfp_flags: GFP mask for allocation + * @ctor: custom constructor to initialize the shadow data (optional) + * @ctor_data: pointer to any data needed by @ctor (optional) * * Returns a pointer to existing shadow data if an shadow * variable is already present. Otherwise, it creates a new shadow * variable like klp_shadow_alloc(). * - * This function guarantees that only one shadow variable exists with - * the given @id for the given @obj. It also guarantees that the shadow - * variable will be initialized by the given @data only when it did not - * exist before. + * This function guarantees that only one shadow variable exists with the given + * @id for the given @obj. It also guarantees that the constructor function + * will be called only when the variable did not exist before. The cost is + * that @ctor is called in atomic context under a spin lock. * * Return: the shadow variable data element, NULL on failure. */ -void *klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, - size_t size, gfp_t gfp_flags) +void *klp_shadow_get_or_alloc(void *obj, unsigned long id, + size_t size, gfp_t gfp_flags, + klp_shadow_ctor_t ctor, void *ctor_data) { - return __klp_shadow_get_or_alloc(obj, id, data, size, gfp_flags, false); + return __klp_shadow_get_or_alloc(obj, id, size, gfp_flags, + ctor, ctor_data, false); } EXPORT_SYMBOL_GPL(klp_shadow_get_or_alloc); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index 830c55514f9f..04151c7f2631 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -56,6 +56,21 @@ struct dummy { unsigned long jiffies_expire; }; +/* + * The constructor makes more sense together with klp_shadow_get_or_alloc(). + * In this example, it would be safe to assign the pointer also to the shadow + * variable returned by klp_shadow_alloc(). But we wanted to show the more + * complicated use of the API. + */ +static int shadow_leak_ctor(void *obj, void *shadow_data, void *ctor_data) +{ + void **shadow_leak = shadow_data; + void *leak = ctor_data; + + *shadow_leak = leak; + return 0; +} + struct dummy *livepatch_fix1_dummy_alloc(void) { struct dummy *d; @@ -74,7 +89,8 @@ struct dummy *livepatch_fix1_dummy_alloc(void) * pointer to handle resource release. */ leak = kzalloc(sizeof(int), GFP_KERNEL); - klp_shadow_alloc(d, SV_LEAK, &leak, sizeof(leak), GFP_KERNEL); + klp_shadow_alloc(d, SV_LEAK, sizeof(leak), GFP_KERNEL, + shadow_leak_ctor, leak); pr_info("%s: dummy @ %p, expires @ %lx\n", __func__, d, d->jiffies_expire); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index ff9948f0ec00..d6c62844dc15 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -53,17 +53,15 @@ struct dummy { bool livepatch_fix2_dummy_check(struct dummy *d, unsigned long jiffies) { int *shadow_count; - int count; /* * Patch: handle in-flight dummy structures, if they do not * already have a SV_COUNTER shadow variable, then attach a * new one. */ - count = 0; shadow_count = klp_shadow_get_or_alloc(d, SV_COUNTER, - &count, sizeof(count), - GFP_NOWAIT); + sizeof(*shadow_count), GFP_NOWAIT, + NULL, NULL); if (shadow_count) *shadow_count += 1; From 3b2c77d000fe9f7d02e9e726e00dccf9f92b256f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 16 Apr 2018 13:36:47 +0200 Subject: [PATCH 223/660] livepatch: Allow to call a custom callback when freeing shadow variables We might need to do some actions before the shadow variable is freed. For example, we might need to remove it from a list or free some data that it points to. This is already possible now. The user can get the shadow variable by klp_shadow_get(), do the necessary actions, and then call klp_shadow_free(). This patch allows to do it a more elegant way. The user could implement the needed actions in a callback that is passed to klp_shadow_free() as a parameter. The callback usually does reverse operations to the constructor callback that can be called by klp_shadow_*alloc(). It is especially useful for klp_shadow_free_all(). There we need to do these extra actions for each found shadow variable with the given ID. Note that the memory used by the shadow variable itself is still released later by rcu callback. It is needed to protect internal structures that keep all shadow variables. But the destructor is called immediately. The shadow variable must not be access anyway after klp_shadow_free() is called. The user is responsible to protect this any suitable way. Be aware that the destructor is called under klp_shadow_lock. It is the same as for the contructor in klp_shadow_alloc(). Signed-off-by: Petr Mladek Acked-by: Josh Poimboeuf Acked-by: Miroslav Benes Signed-off-by: Jiri Kosina --- Documentation/livepatch/shadow-vars.txt | 10 ++++++--- include/linux/livepatch.h | 5 +++-- kernel/livepatch/shadow.c | 26 +++++++++++++++------- samples/livepatch/livepatch-shadow-fix1.c | 25 ++++++++++++--------- samples/livepatch/livepatch-shadow-fix2.c | 27 ++++++++++++++--------- 5 files changed, 59 insertions(+), 34 deletions(-) diff --git a/Documentation/livepatch/shadow-vars.txt b/Documentation/livepatch/shadow-vars.txt index 9c7ae191641c..ecc09a7be5dd 100644 --- a/Documentation/livepatch/shadow-vars.txt +++ b/Documentation/livepatch/shadow-vars.txt @@ -65,11 +65,15 @@ to do actions that can be done only once when a new variable is allocated. * klp_shadow_free() - detach and free a shadow variable - find and remove a reference from global hashtable - - if found, free shadow variable + - if found + - call destructor function if defined + - free shadow variable * klp_shadow_free_all() - detach and free all <*, id> shadow variables - find and remove any <*, id> references from global hashtable - - if found, free shadow variable + - if found + - call destructor function if defined + - free shadow variable 2. Use cases @@ -136,7 +140,7 @@ variable: void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { - klp_shadow_free(sta, PS_LOCK); + klp_shadow_free(sta, PS_LOCK, NULL); kfree(sta); ... diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 7e084321b146..aec44b1d9582 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -189,6 +189,7 @@ static inline bool klp_have_reliable_stack(void) typedef int (*klp_shadow_ctor_t)(void *obj, void *shadow_data, void *ctor_data); +typedef void (*klp_shadow_dtor_t)(void *obj, void *shadow_data); void *klp_shadow_get(void *obj, unsigned long id); void *klp_shadow_alloc(void *obj, unsigned long id, @@ -197,8 +198,8 @@ void *klp_shadow_alloc(void *obj, unsigned long id, void *klp_shadow_get_or_alloc(void *obj, unsigned long id, size_t size, gfp_t gfp_flags, klp_shadow_ctor_t ctor, void *ctor_data); -void klp_shadow_free(void *obj, unsigned long id); -void klp_shadow_free_all(unsigned long id); +void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor); +void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor); #else /* !CONFIG_LIVEPATCH */ diff --git a/kernel/livepatch/shadow.c b/kernel/livepatch/shadow.c index b10a0bbb7f84..83958c814439 100644 --- a/kernel/livepatch/shadow.c +++ b/kernel/livepatch/shadow.c @@ -243,15 +243,26 @@ void *klp_shadow_get_or_alloc(void *obj, unsigned long id, } EXPORT_SYMBOL_GPL(klp_shadow_get_or_alloc); +static void klp_shadow_free_struct(struct klp_shadow *shadow, + klp_shadow_dtor_t dtor) +{ + hash_del_rcu(&shadow->node); + if (dtor) + dtor(shadow->obj, shadow->data); + kfree_rcu(shadow, rcu_head); +} + /** * klp_shadow_free() - detach and free a shadow variable * @obj: pointer to parent object * @id: data identifier + * @dtor: custom callback that can be used to unregister the variable + * and/or free data that the shadow variable points to (optional) * * This function releases the memory for this shadow variable * instance, callers should stop referencing it accordingly. */ -void klp_shadow_free(void *obj, unsigned long id) +void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor) { struct klp_shadow *shadow; unsigned long flags; @@ -263,8 +274,7 @@ void klp_shadow_free(void *obj, unsigned long id) (unsigned long)obj) { if (klp_shadow_match(shadow, obj, id)) { - hash_del_rcu(&shadow->node); - kfree_rcu(shadow, rcu_head); + klp_shadow_free_struct(shadow, dtor); break; } } @@ -276,11 +286,13 @@ EXPORT_SYMBOL_GPL(klp_shadow_free); /** * klp_shadow_free_all() - detach and free all <*, id> shadow variables * @id: data identifier + * @dtor: custom callback that can be used to unregister the variable + * and/or free data that the shadow variable points to (optional) * * This function releases the memory for all <*, id> shadow variable * instances, callers should stop referencing them accordingly. */ -void klp_shadow_free_all(unsigned long id) +void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor) { struct klp_shadow *shadow; unsigned long flags; @@ -290,10 +302,8 @@ void klp_shadow_free_all(unsigned long id) /* Delete all <*, id> from hash */ hash_for_each(klp_shadow_hash, i, shadow, node) { - if (klp_shadow_match(shadow, shadow->obj, id)) { - hash_del_rcu(&shadow->node); - kfree_rcu(shadow, rcu_head); - } + if (klp_shadow_match(shadow, shadow->obj, id)) + klp_shadow_free_struct(shadow, dtor); } spin_unlock_irqrestore(&klp_shadow_lock, flags); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index 04151c7f2631..49b13553eaae 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -98,9 +98,19 @@ struct dummy *livepatch_fix1_dummy_alloc(void) return d; } +static void livepatch_fix1_dummy_leak_dtor(void *obj, void *shadow_data) +{ + void *d = obj; + void **shadow_leak = shadow_data; + + kfree(*shadow_leak); + pr_info("%s: dummy @ %p, prevented leak @ %p\n", + __func__, d, *shadow_leak); +} + void livepatch_fix1_dummy_free(struct dummy *d) { - void **shadow_leak, *leak; + void **shadow_leak; /* * Patch: fetch the saved SV_LEAK shadow variable, detach and @@ -109,15 +119,10 @@ void livepatch_fix1_dummy_free(struct dummy *d) * was loaded.) */ shadow_leak = klp_shadow_get(d, SV_LEAK); - if (shadow_leak) { - leak = *shadow_leak; - klp_shadow_free(d, SV_LEAK); - kfree(leak); - pr_info("%s: dummy @ %p, prevented leak @ %p\n", - __func__, d, leak); - } else { + if (shadow_leak) + klp_shadow_free(d, SV_LEAK, livepatch_fix1_dummy_leak_dtor); + else pr_info("%s: dummy @ %p leaked!\n", __func__, d); - } kfree(d); } @@ -163,7 +168,7 @@ static int livepatch_shadow_fix1_init(void) static void livepatch_shadow_fix1_exit(void) { /* Cleanup any existing SV_LEAK shadow variables */ - klp_shadow_free_all(SV_LEAK); + klp_shadow_free_all(SV_LEAK, livepatch_fix1_dummy_leak_dtor); WARN_ON(klp_unregister_patch(&patch)); } diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index d6c62844dc15..b34c7bf83356 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -68,22 +68,27 @@ bool livepatch_fix2_dummy_check(struct dummy *d, unsigned long jiffies) return time_after(jiffies, d->jiffies_expire); } +static void livepatch_fix2_dummy_leak_dtor(void *obj, void *shadow_data) +{ + void *d = obj; + void **shadow_leak = shadow_data; + + kfree(*shadow_leak); + pr_info("%s: dummy @ %p, prevented leak @ %p\n", + __func__, d, *shadow_leak); +} + void livepatch_fix2_dummy_free(struct dummy *d) { - void **shadow_leak, *leak; + void **shadow_leak; int *shadow_count; /* Patch: copy the memory leak patch from the fix1 module. */ shadow_leak = klp_shadow_get(d, SV_LEAK); - if (shadow_leak) { - leak = *shadow_leak; - klp_shadow_free(d, SV_LEAK); - kfree(leak); - pr_info("%s: dummy @ %p, prevented leak @ %p\n", - __func__, d, leak); - } else { + if (shadow_leak) + klp_shadow_free(d, SV_LEAK, livepatch_fix2_dummy_leak_dtor); + else pr_info("%s: dummy @ %p leaked!\n", __func__, d); - } /* * Patch: fetch the SV_COUNTER shadow variable and display @@ -93,7 +98,7 @@ void livepatch_fix2_dummy_free(struct dummy *d) if (shadow_count) { pr_info("%s: dummy @ %p, check counter = %d\n", __func__, d, *shadow_count); - klp_shadow_free(d, SV_COUNTER); + klp_shadow_free(d, SV_COUNTER, NULL); } kfree(d); @@ -140,7 +145,7 @@ static int livepatch_shadow_fix2_init(void) static void livepatch_shadow_fix2_exit(void) { /* Cleanup any existing SV_COUNTER shadow variables */ - klp_shadow_free_all(SV_COUNTER); + klp_shadow_free_all(SV_COUNTER, NULL); WARN_ON(klp_unregister_patch(&patch)); } From 165d102905691891f85cb90736c25150b7b25d29 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Mar 2018 11:51:43 +0100 Subject: [PATCH 224/660] arm64: KVM: Demote SVE and LORegion warnings to debug only While generating a message about guests probing for SVE/LORegions is a useful debugging tool, considering it an error is slightly over the top, as this is the only way the guest can find out about the presence of the feature. Let's turn these message into kvm_debug so that they can only be seen if CONFIG_DYNAMIC_DEBUG, and kept quiet otherwise. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 806b0b126a64..6e3b969391fd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -996,14 +996,12 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz) if (id == SYS_ID_AA64PFR0_EL1) { if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT)) - pr_err_once("kvm [%i]: SVE unsupported for guests, suppressing\n", - task_pid_nr(current)); + kvm_debug("SVE unsupported for guests, suppressing\n"); val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); } else if (id == SYS_ID_AA64MMFR1_EL1) { if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT)) - pr_err_once("kvm [%i]: LORegions unsupported for guests, suppressing\n", - task_pid_nr(current)); + kvm_debug("LORegions unsupported for guests, suppressing\n"); val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT); } From bf9a41377d14f565764022470e14aae72559589a Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 17 Apr 2018 11:23:49 +0100 Subject: [PATCH 225/660] KVM: arm/arm64: vgic: Kick new VCPU on interrupt migration When vgic_prune_ap_list() finds an interrupt that needs to be migrated to a new VCPU, we should notify this VCPU of the pending interrupt, since it requires immediate action. Kick this VCPU once we have added the new IRQ to the list, but only after dropping the locks. Reported-by: Stefano Stabellini Reviewed-by: Christoffer Dall Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e74baec76361..4b6d72939c42 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -594,6 +594,7 @@ retry: list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; + bool target_vcpu_needs_kick = false; spin_lock(&irq->irq_lock); @@ -664,11 +665,18 @@ retry: list_del(&irq->ap_list); irq->vcpu = target_vcpu; list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); + target_vcpu_needs_kick = true; } spin_unlock(&irq->irq_lock); spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags); + + if (target_vcpu_needs_kick) { + kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); + kvm_vcpu_kick(target_vcpu); + } + goto retry; } From cd6e992b3aab072cc90839508aaf5573c8f7e066 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 12 Apr 2018 20:26:27 +0300 Subject: [PATCH 226/660] xen/sndif: Sync up with the canonical definition in Xen This is the sync up with the canonical definition of the sound protocol in Xen: 1. Protocol version was referenced in the protocol description, but missed its definition. Fixed by adding a constant for current protocol version. 2. Some of the request descriptions have "reserved" fields missed: fixed by adding corresponding entries. 3. Extend the size of the requests and responses to 64 octets. Bump protocol version to 2. 4. Add explicit back and front synchronization In order to provide explicit synchronization between backend and frontend the following changes are introduced in the protocol: - add new ring buffer for sending asynchronous events from backend to frontend to report number of bytes played by the frontend (XENSND_EVT_CUR_POS) - introduce trigger events for playback control: start/stop/pause/resume - add "req-" prefix to event-channel and ring-ref to unify naming of the Xen event channels for requests and events 5. Add explicit back and front parameter negotiation In order to provide explicit stream parameter negotiation between backend and frontend the following changes are introduced in the protocol: add XENSND_OP_HW_PARAM_QUERY request to read/update configuration space for the parameters given: request passes desired parameter's intervals/masks and the response to this request returns allowed min/max intervals/masks to be used. Signed-off-by: Oleksandr Andrushchenko Signed-off-by: Oleksandr Grytsov Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Boris Ostrovsky Cc: Konrad Rzeszutek Wilk Cc: Takashi Iwai Signed-off-by: Boris Ostrovsky --- include/xen/interface/io/sndif.h | 324 +++++++++++++++++++++++++++++-- 1 file changed, 307 insertions(+), 17 deletions(-) diff --git a/include/xen/interface/io/sndif.h b/include/xen/interface/io/sndif.h index 5c918276835e..78bb5d9f8d83 100644 --- a/include/xen/interface/io/sndif.h +++ b/include/xen/interface/io/sndif.h @@ -36,6 +36,13 @@ #include "ring.h" #include "../grant_table.h" +/* + ****************************************************************************** + * Protocol version + ****************************************************************************** + */ +#define XENSND_PROTOCOL_VERSION 2 + /* ****************************************************************************** * Feature and Parameter Negotiation @@ -106,6 +113,8 @@ * * /local/domain/1/device/vsnd/0/0/0/ring-ref = "386" * /local/domain/1/device/vsnd/0/0/0/event-channel = "15" + * /local/domain/1/device/vsnd/0/0/0/evt-ring-ref = "1386" + * /local/domain/1/device/vsnd/0/0/0/evt-event-channel = "215" * *------------------------------ Stream 1, capture ---------------------------- * @@ -115,6 +124,8 @@ * * /local/domain/1/device/vsnd/0/0/1/ring-ref = "384" * /local/domain/1/device/vsnd/0/0/1/event-channel = "13" + * /local/domain/1/device/vsnd/0/0/1/evt-ring-ref = "1384" + * /local/domain/1/device/vsnd/0/0/1/evt-event-channel = "213" * *------------------------------- PCM device 1 -------------------------------- * @@ -128,6 +139,8 @@ * * /local/domain/1/device/vsnd/0/1/0/ring-ref = "387" * /local/domain/1/device/vsnd/0/1/0/event-channel = "151" + * /local/domain/1/device/vsnd/0/1/0/evt-ring-ref = "1387" + * /local/domain/1/device/vsnd/0/1/0/evt-event-channel = "351" * *------------------------------- PCM device 2 -------------------------------- * @@ -140,6 +153,8 @@ * * /local/domain/1/device/vsnd/0/2/0/ring-ref = "389" * /local/domain/1/device/vsnd/0/2/0/event-channel = "152" + * /local/domain/1/device/vsnd/0/2/0/evt-ring-ref = "1389" + * /local/domain/1/device/vsnd/0/2/0/evt-event-channel = "452" * ****************************************************************************** * Backend XenBus Nodes @@ -285,6 +300,23 @@ * The Xen grant reference granting permission for the backend to map * a sole page in a single page sized ring buffer. * + *--------------------- Stream Event Transport Parameters --------------------- + * + * This communication path is used to deliver asynchronous events from backend + * to frontend, set up per stream. + * + * evt-event-channel + * Values: + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * evt-ring-ref + * Values: + * + * The Xen grant reference granting permission for the backend to map + * a sole page in a single page sized ring buffer. + * ****************************************************************************** * STATE DIAGRAMS ****************************************************************************** @@ -432,6 +464,20 @@ #define XENSND_OP_GET_VOLUME 5 #define XENSND_OP_MUTE 6 #define XENSND_OP_UNMUTE 7 +#define XENSND_OP_TRIGGER 8 +#define XENSND_OP_HW_PARAM_QUERY 9 + +#define XENSND_OP_TRIGGER_START 0 +#define XENSND_OP_TRIGGER_PAUSE 1 +#define XENSND_OP_TRIGGER_STOP 2 +#define XENSND_OP_TRIGGER_RESUME 3 + +/* + ****************************************************************************** + * EVENT CODES + ****************************************************************************** + */ +#define XENSND_EVT_CUR_POS 0 /* ****************************************************************************** @@ -448,6 +494,8 @@ #define XENSND_FIELD_VCARD_LONG_NAME "long-name" #define XENSND_FIELD_RING_REF "ring-ref" #define XENSND_FIELD_EVT_CHNL "event-channel" +#define XENSND_FIELD_EVT_RING_REF "evt-ring-ref" +#define XENSND_FIELD_EVT_EVT_CHNL "evt-event-channel" #define XENSND_FIELD_DEVICE_NAME "name" #define XENSND_FIELD_TYPE "type" #define XENSND_FIELD_STREAM_UNIQUE_ID "unique-id" @@ -526,7 +574,7 @@ * *---------------------------------- Requests --------------------------------- * - * All request packets have the same length (32 octets) + * All request packets have the same length (64 octets) * All request packets have common header: * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ @@ -559,11 +607,13 @@ * +----------------+----------------+----------------+----------------+ * | gref_directory | 24 * +----------------+----------------+----------------+----------------+ - * | reserved | 28 + * | period_sz | 28 + * +----------------+----------------+----------------+----------------+ + * | reserved | 32 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ - * | reserved | 32 + * | reserved | 64 * +----------------+----------------+----------------+----------------+ * * pcm_rate - uint32_t, stream data rate, Hz @@ -571,6 +621,14 @@ * pcm_channels - uint8_t, number of channels of this stream, * [channels-min; channels-max] * buffer_sz - uint32_t, buffer size to be allocated, octets + * period_sz - uint32_t, event period size, octets + * This is the requested value of the period at which frontend would + * like to receive XENSND_EVT_CUR_POS notifications from the backend when + * stream position advances during playback/capture. + * It shows how many octets are expected to be played/captured before + * sending such an event. + * If set to 0 no XENSND_EVT_CUR_POS events are sent by the backend. + * * gref_directory - grant_ref_t, a reference to the first shared page * describing shared buffer references. At least one page exists. If shared * buffer size (buffer_sz) exceeds what can be addressed by this single page, @@ -585,6 +643,7 @@ struct xensnd_open_req { uint16_t reserved; uint32_t buffer_sz; grant_ref_t gref_directory; + uint32_t period_sz; }; /* @@ -632,7 +691,7 @@ struct xensnd_page_directory { * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ - * | reserved | 32 + * | reserved | 64 * +----------------+----------------+----------------+----------------+ * * Request read/write - used for read (for capture) or write (for playback): @@ -650,7 +709,7 @@ struct xensnd_page_directory { * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ - * | reserved | 32 + * | reserved | 64 * +----------------+----------------+----------------+----------------+ * * operation - XENSND_OP_READ for read or XENSND_OP_WRITE for write @@ -673,9 +732,11 @@ struct xensnd_rw_req { * +----------------+----------------+----------------+----------------+ * | length | 16 * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ - * | reserved | 32 + * | reserved | 64 * +----------------+----------------+----------------+----------------+ * * operation - XENSND_OP_SET_VOLUME for volume set @@ -713,9 +774,11 @@ struct xensnd_rw_req { * +----------------+----------------+----------------+----------------+ * | length | 16 * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ - * | reserved | 32 + * | reserved | 64 * +----------------+----------------+----------------+----------------+ * * operation - XENSND_OP_MUTE for mute or XENSND_OP_UNMUTE for unmute @@ -743,32 +806,213 @@ struct xensnd_rw_req { * * The 'struct xensnd_rw_req' is also used for XENSND_OP_SET_VOLUME, * XENSND_OP_GET_VOLUME, XENSND_OP_MUTE, XENSND_OP_UNMUTE. + * + * Request stream running state change - trigger PCM stream running state + * to start, stop, pause or resume: + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _OP_TRIGGER | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | type | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * type - uint8_t, XENSND_OP_TRIGGER_XXX value */ +struct xensnd_trigger_req { + uint8_t type; +}; + +/* + * Request stream parameter ranges: request intervals and + * masks of supported ranges for stream configuration values. + * + * Sound device configuration for a particular stream is a limited subset + * of the multidimensional configuration available on XenStore, e.g. + * once the frame rate has been selected there is a limited supported range + * for sample rates becomes available (which might be the same set configured + * on XenStore or less). For example, selecting 96kHz sample rate may limit + * number of channels available for such configuration from 4 to 2, etc. + * Thus, each call to XENSND_OP_HW_PARAM_QUERY may reduce configuration + * space making it possible to iteratively get the final stream configuration, + * used in XENSND_OP_OPEN request. + * + * See response format for this request. + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _HW_PARAM_QUERY| reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | formats mask low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | formats mask high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | min rate | 20 + * +----------------+----------------+----------------+----------------+ + * | max rate | 24 + * +----------------+----------------+----------------+----------------+ + * | min channels | 28 + * +----------------+----------------+----------------+----------------+ + * | max channels | 32 + * +----------------+----------------+----------------+----------------+ + * | min buffer frames | 36 + * +----------------+----------------+----------------+----------------+ + * | max buffer frames | 40 + * +----------------+----------------+----------------+----------------+ + * | min period frames | 44 + * +----------------+----------------+----------------+----------------+ + * | max period frames | 48 + * +----------------+----------------+----------------+----------------+ + * | reserved | 52 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * formats - uint64_t, bit mask representing values of the parameter + * made as bitwise OR of (1 << XENSND_PCM_FORMAT_XXX) values + * + * For interval parameters: + * min - uint32_t, minimum value of the parameter + * max - uint32_t, maximum value of the parameter + * + * Frame is defined as a product of the number of channels by the + * number of octets per one sample. + */ + +struct xensnd_query_hw_param { + uint64_t formats; + struct { + uint32_t min; + uint32_t max; + } rates; + struct { + uint32_t min; + uint32_t max; + } channels; + struct { + uint32_t min; + uint32_t max; + } buffer; + struct { + uint32_t min; + uint32_t max; + } period; +}; + /* *---------------------------------- Responses -------------------------------- * - * All response packets have the same length (32 octets) + * All response packets have the same length (64 octets) * - * Response for all requests: + * All response packets have common header: * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | id | operation | reserved | 4 * +----------------+----------------+----------------+----------------+ * | status | 8 * +----------------+----------------+----------------+----------------+ - * | reserved | 12 - * +----------------+----------------+----------------+----------------+ - * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| - * +----------------+----------------+----------------+----------------+ - * | reserved | 32 - * +----------------+----------------+----------------+----------------+ * * id - uint16_t, copied from the request * operation - uint8_t, XENSND_OP_* - copied from request * status - int32_t, response status, zero on success and -XEN_EXX on failure + * + * + * HW parameter query response - response for XENSND_OP_HW_PARAM_QUERY: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | status | 8 + * +----------------+----------------+----------------+----------------+ + * | formats mask low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | formats mask high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | min rate | 20 + * +----------------+----------------+----------------+----------------+ + * | max rate | 24 + * +----------------+----------------+----------------+----------------+ + * | min channels | 28 + * +----------------+----------------+----------------+----------------+ + * | max channels | 32 + * +----------------+----------------+----------------+----------------+ + * | min buffer frames | 36 + * +----------------+----------------+----------------+----------------+ + * | max buffer frames | 40 + * +----------------+----------------+----------------+----------------+ + * | min period frames | 44 + * +----------------+----------------+----------------+----------------+ + * | max period frames | 48 + * +----------------+----------------+----------------+----------------+ + * | reserved | 52 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Meaning of the values in this response is the same as for + * XENSND_OP_HW_PARAM_QUERY request. */ +/* + *----------------------------------- Events ---------------------------------- + * + * Events are sent via shared page allocated by the front and propagated by + * evt-event-channel/evt-ring-ref XenStore entries + * All event packets have the same length (64 octets) + * All event packets have common header: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | type | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * + * id - uint16_t, event id, may be used by front + * type - uint8_t, type of the event + * + * + * Current stream position - event from back to front when stream's + * playback/capture position has advanced: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _EVT_CUR_POS | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | position low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | position high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * position - current value of stream's playback/capture position, octets + * + */ + +struct xensnd_cur_pos_evt { + uint64_t position; +}; + struct xensnd_req { uint16_t id; uint8_t operation; @@ -776,7 +1020,9 @@ struct xensnd_req { union { struct xensnd_open_req open; struct xensnd_rw_req rw; - uint8_t reserved[24]; + struct xensnd_trigger_req trigger; + struct xensnd_query_hw_param hw_param; + uint8_t reserved[56]; } op; }; @@ -785,9 +1031,53 @@ struct xensnd_resp { uint8_t operation; uint8_t reserved; int32_t status; - uint8_t reserved1[24]; + union { + struct xensnd_query_hw_param hw_param; + uint8_t reserved1[56]; + } resp; +}; + +struct xensnd_evt { + uint16_t id; + uint8_t type; + uint8_t reserved[5]; + union { + struct xensnd_cur_pos_evt cur_pos; + uint8_t reserved[56]; + } op; }; DEFINE_RING_TYPES(xen_sndif, struct xensnd_req, struct xensnd_resp); +/* + ****************************************************************************** + * Back to front events delivery + ****************************************************************************** + * In order to deliver asynchronous events from back to front a shared page is + * allocated by front and its granted reference propagated to back via + * XenStore entries (evt-ring-ref/evt-event-channel). + * This page has a common header used by both front and back to synchronize + * access and control event's ring buffer, while back being a producer of the + * events and front being a consumer. The rest of the page after the header + * is used for event packets. + * + * Upon reception of an event(s) front may confirm its reception + * for either each event, group of events or none. + */ + +struct xensnd_event_page { + uint32_t in_cons; + uint32_t in_prod; + uint8_t reserved[56]; +}; + +#define XENSND_EVENT_PAGE_SIZE XEN_PAGE_SIZE +#define XENSND_IN_RING_OFFS (sizeof(struct xensnd_event_page)) +#define XENSND_IN_RING_SIZE (XENSND_EVENT_PAGE_SIZE - XENSND_IN_RING_OFFS) +#define XENSND_IN_RING_LEN (XENSND_IN_RING_SIZE / sizeof(struct xensnd_evt)) +#define XENSND_IN_RING(page) \ + ((struct xensnd_evt *)((char *)(page) + XENSND_IN_RING_OFFS)) +#define XENSND_IN_RING_REF(page, idx) \ + (XENSND_IN_RING((page))[(idx) % XENSND_IN_RING_LEN]) + #endif /* __XEN_PUBLIC_IO_SNDIF_H__ */ From ebf04f331fa15a966262341a7dc6b1a0efd633e4 Mon Sep 17 00:00:00 2001 From: Simon Gaiser Date: Thu, 15 Mar 2018 04:08:03 +0100 Subject: [PATCH 227/660] xen: xenbus_dev_frontend: Really return response string xenbus_command_reply() did not actually copy the response string and leaked stack content instead. Fixes: 9a6161fe73bd ("xen: return xenstore command failures via response instead of rc") Signed-off-by: Simon Gaiser Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_dev_frontend.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 0d6d9264d6a9..c3e201025ef0 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -403,7 +403,7 @@ static int xenbus_command_reply(struct xenbus_file_priv *u, { struct { struct xsd_sockmsg hdr; - const char body[16]; + char body[16]; } msg; int rc; @@ -412,6 +412,7 @@ static int xenbus_command_reply(struct xenbus_file_priv *u, msg.hdr.len = strlen(reply) + 1; if (msg.hdr.len > sizeof(msg.body)) return -E2BIG; + memcpy(&msg.body, reply, msg.hdr.len); mutex_lock(&u->reply_mutex); rc = queue_reply(&u->read_buffers, &msg, sizeof(msg.hdr) + msg.hdr.len); From e2f73a1828e9ffd2765ce1726b9a9c6e022e3cd6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 16 Apr 2018 08:18:22 +0200 Subject: [PATCH 228/660] tools/headers: Synchronize kernel ABI headers, v4.17-rc1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync the following tooling headers with the latest kernel version: tools/arch/arm/include/uapi/asm/kvm.h - New ABI: KVM_REG_ARM_* tools/arch/x86/include/asm/required-features.h - Removal of NEED_LA57 dependency tools/arch/x86/include/uapi/asm/kvm.h - New KVM ABI: KVM_SYNC_X86_* tools/include/uapi/asm-generic/mman-common.h - New ABI: MAP_FIXED_NOREPLACE flag tools/include/uapi/linux/bpf.h - New ABI: BPF_F_SEQ_NUMBER functions tools/include/uapi/linux/if_link.h - New ABI: IFLA tun and rmnet support tools/include/uapi/linux/kvm.h - New ABI: hyperv eventfd and CONN_ID_MASK support plus header cleanups tools/include/uapi/sound/asound.h - New ABI: SNDRV_PCM_FORMAT_FIRST PCM format specifier tools/perf/arch/x86/entry/syscalls/syscall_64.tbl - The x86 system call table description changed due to the ptregs changes and the renames, in: d5a00528b58c: syscalls/core, syscalls/x86: Rename struct pt_regs-based sys_*() to __x64_sys_*() 5ac9efa3c50d: syscalls/core, syscalls/x86: Clean up compat syscall stub naming convention ebeb8c82ffaf: syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32 Also fix the x86 syscall table warning: -Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' +Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' None of these changes impact existing tooling code, so we only have to copy the kernel version. Signed-off-by: Ingo Molnar Cc: Adrian Hunter Cc: Alexander Potapenko Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Alexey Budankov Cc: Andi Kleen Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Brian Robbins Cc: Clark Williams Cc: Daniel Borkmann Cc: David Ahern Cc: Dmitriy Vyukov Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Jesper Dangaard Brouer Cc: Jin Yao Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Kan Liang Cc: Kim Phillips Cc: Linus Torvalds Cc: Li Zhijian Cc: Mark Rutland Cc: Martin Liška Cc: Martin Schwidefsky Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Stephane Eranian Cc: Stephen Rothwell Cc: Takuya Yamamoto Cc: Thomas Gleixner Cc: Thomas Richter Cc: Wang Nan Cc: William Cohen Cc: Yonghong Song Link: http://lkml.kernel.org/r/20180416064024.ofjtrz5yuu3ykhvl@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 9 + .../arch/x86/include/asm/required-features.h | 8 +- tools/arch/x86/include/uapi/asm/kvm.h | 19 +- tools/include/uapi/asm-generic/mman-common.h | 3 + tools/include/uapi/linux/bpf.h | 1 + tools/include/uapi/linux/if_link.h | 39 + tools/include/uapi/linux/kvm.h | 21 +- tools/include/uapi/sound/asound.h | 1 + tools/perf/arch/x86/Makefile | 2 +- .../arch/x86/entry/syscalls/syscall_64.tbl | 712 +++++++++--------- 10 files changed, 451 insertions(+), 364 deletions(-) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 6edd177bb1c7..2ba95d6fe852 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -135,6 +135,15 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_CRM_SHIFT 7 #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +/* + * For KVM currently all guest registers are nonsecure, but we reserve a bit + * in the encoding to distinguish secure from nonsecure for AArch32 system + * registers that are banked by security. This is 1 for the secure banked + * register, and 0 for the nonsecure banked register or if the register is + * not banked by security. + */ +#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000 +#define KVM_REG_ARM_SECURE_SHIFT 28 #define ARM_CP15_REG_SHIFT_MASK(x,n) \ (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index fb3a6de7440b..6847d85400a8 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index f3a960488eae..c535c2fdea13 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -354,8 +354,25 @@ struct kvm_xcrs { __u64 padding[16]; }; -/* definition of registers in kvm_run */ +#define KVM_SYNC_X86_REGS (1UL << 0) +#define KVM_SYNC_X86_SREGS (1UL << 1) +#define KVM_SYNC_X86_EVENTS (1UL << 2) + +#define KVM_SYNC_X86_VALID_FIELDS \ + (KVM_SYNC_X86_REGS| \ + KVM_SYNC_X86_SREGS| \ + KVM_SYNC_X86_EVENTS) + +/* kvm_sync_regs struct included by kvm_run struct */ struct kvm_sync_regs { + /* Members of this structure are potentially malicious. + * Care must be taken by code reading, esp. interpreting, + * data fields from them inside KVM to prevent TOCTOU and + * double-fetch types of vulnerabilities. + */ + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; }; #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index f8b134f5608f..e7ee32861d51 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -27,6 +27,9 @@ # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ #endif +/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + /* * Flags for mlock */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9d07465023a2..c5ec89732a8d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -864,6 +864,7 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) +#define BPF_F_SEQ_NUMBER (1ULL << 3) /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 6d9447700e18..68699f654118 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -941,4 +941,43 @@ enum { IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ }; +/* tun section */ + +enum { + IFLA_TUN_UNSPEC, + IFLA_TUN_OWNER, + IFLA_TUN_GROUP, + IFLA_TUN_TYPE, + IFLA_TUN_PI, + IFLA_TUN_VNET_HDR, + IFLA_TUN_PERSIST, + IFLA_TUN_MULTI_QUEUE, + IFLA_TUN_NUM_QUEUES, + IFLA_TUN_NUM_DISABLED_QUEUES, + __IFLA_TUN_MAX, +}; + +#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) + +/* rmnet section */ + +#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) +#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) + +enum { + IFLA_RMNET_UNSPEC, + IFLA_RMNET_MUX_ID, + IFLA_RMNET_FLAGS, + __IFLA_RMNET_MAX, +}; + +#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) + +struct ifla_rmnet_flags { + __u32 flags; + __u32 mask; +}; + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6b89f87db200..1065006c9bf5 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -396,6 +396,10 @@ struct kvm_run { char padding[256]; }; + /* 2048 is the size of the char array used to bound/pad the size + * of the union that holds sync regs. + */ + #define SYNC_REGS_SIZE_BYTES 2048 /* * shared registers between kvm and userspace. * kvm_valid_regs specifies the register classes set by the host @@ -407,7 +411,7 @@ struct kvm_run { __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[2048]; + char padding[SYNC_REGS_SIZE_BYTES]; } s; }; @@ -936,6 +940,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_GET_CPU_CHAR 151 #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 +#define KVM_CAP_HYPERV_EVENTFD 154 #ifdef KVM_CAP_IRQ_ROUTING @@ -1375,6 +1380,10 @@ struct kvm_enc_region { #define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) #define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) +/* Available with KVM_CAP_HYPERV_EVENTFD */ +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) + + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1515,4 +1524,14 @@ struct kvm_assigned_msix_entry { #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) #define KVM_ARM_DEV_PMU (1 << 2) +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 07d61583fd02..ed0a120d4f08 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -242,6 +242,7 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_DSD_U16_BE ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */ #define SNDRV_PCM_FORMAT_DSD_U32_BE ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */ #define SNDRV_PCM_FORMAT_LAST SNDRV_PCM_FORMAT_DSD_U32_BE +#define SNDRV_PCM_FORMAT_FIRST SNDRV_PCM_FORMAT_S8 #ifdef SNDRV_LITTLE_ENDIAN #define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_LE diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index d74eaa7aa927..1a38e78117ce 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -21,7 +21,7 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sys)/syscall_64.tbl $(systbl) @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \ - || echo "Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true + || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5aef183e2f85..4dfe42666d0c 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -4,379 +4,383 @@ # The format is: # # +# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls +# # The abi is "common", "64" or "x32" for this file. # -0 common read sys_read -1 common write sys_write -2 common open sys_open -3 common close sys_close -4 common stat sys_newstat -5 common fstat sys_newfstat -6 common lstat sys_newlstat -7 common poll sys_poll -8 common lseek sys_lseek -9 common mmap sys_mmap -10 common mprotect sys_mprotect -11 common munmap sys_munmap -12 common brk sys_brk -13 64 rt_sigaction sys_rt_sigaction -14 common rt_sigprocmask sys_rt_sigprocmask -15 64 rt_sigreturn sys_rt_sigreturn/ptregs -16 64 ioctl sys_ioctl -17 common pread64 sys_pread64 -18 common pwrite64 sys_pwrite64 -19 64 readv sys_readv -20 64 writev sys_writev -21 common access sys_access -22 common pipe sys_pipe -23 common select sys_select -24 common sched_yield sys_sched_yield -25 common mremap sys_mremap -26 common msync sys_msync -27 common mincore sys_mincore -28 common madvise sys_madvise -29 common shmget sys_shmget -30 common shmat sys_shmat -31 common shmctl sys_shmctl -32 common dup sys_dup -33 common dup2 sys_dup2 -34 common pause sys_pause -35 common nanosleep sys_nanosleep -36 common getitimer sys_getitimer -37 common alarm sys_alarm -38 common setitimer sys_setitimer -39 common getpid sys_getpid -40 common sendfile sys_sendfile64 -41 common socket sys_socket -42 common connect sys_connect -43 common accept sys_accept -44 common sendto sys_sendto -45 64 recvfrom sys_recvfrom -46 64 sendmsg sys_sendmsg -47 64 recvmsg sys_recvmsg -48 common shutdown sys_shutdown -49 common bind sys_bind -50 common listen sys_listen -51 common getsockname sys_getsockname -52 common getpeername sys_getpeername -53 common socketpair sys_socketpair -54 64 setsockopt sys_setsockopt -55 64 getsockopt sys_getsockopt -56 common clone sys_clone/ptregs -57 common fork sys_fork/ptregs -58 common vfork sys_vfork/ptregs -59 64 execve sys_execve/ptregs -60 common exit sys_exit -61 common wait4 sys_wait4 -62 common kill sys_kill -63 common uname sys_newuname -64 common semget sys_semget -65 common semop sys_semop -66 common semctl sys_semctl -67 common shmdt sys_shmdt -68 common msgget sys_msgget -69 common msgsnd sys_msgsnd -70 common msgrcv sys_msgrcv -71 common msgctl sys_msgctl -72 common fcntl sys_fcntl -73 common flock sys_flock -74 common fsync sys_fsync -75 common fdatasync sys_fdatasync -76 common truncate sys_truncate -77 common ftruncate sys_ftruncate -78 common getdents sys_getdents -79 common getcwd sys_getcwd -80 common chdir sys_chdir -81 common fchdir sys_fchdir -82 common rename sys_rename -83 common mkdir sys_mkdir -84 common rmdir sys_rmdir -85 common creat sys_creat -86 common link sys_link -87 common unlink sys_unlink -88 common symlink sys_symlink -89 common readlink sys_readlink -90 common chmod sys_chmod -91 common fchmod sys_fchmod -92 common chown sys_chown -93 common fchown sys_fchown -94 common lchown sys_lchown -95 common umask sys_umask -96 common gettimeofday sys_gettimeofday -97 common getrlimit sys_getrlimit -98 common getrusage sys_getrusage -99 common sysinfo sys_sysinfo -100 common times sys_times -101 64 ptrace sys_ptrace -102 common getuid sys_getuid -103 common syslog sys_syslog -104 common getgid sys_getgid -105 common setuid sys_setuid -106 common setgid sys_setgid -107 common geteuid sys_geteuid -108 common getegid sys_getegid -109 common setpgid sys_setpgid -110 common getppid sys_getppid -111 common getpgrp sys_getpgrp -112 common setsid sys_setsid -113 common setreuid sys_setreuid -114 common setregid sys_setregid -115 common getgroups sys_getgroups -116 common setgroups sys_setgroups -117 common setresuid sys_setresuid -118 common getresuid sys_getresuid -119 common setresgid sys_setresgid -120 common getresgid sys_getresgid -121 common getpgid sys_getpgid -122 common setfsuid sys_setfsuid -123 common setfsgid sys_setfsgid -124 common getsid sys_getsid -125 common capget sys_capget -126 common capset sys_capset -127 64 rt_sigpending sys_rt_sigpending -128 64 rt_sigtimedwait sys_rt_sigtimedwait -129 64 rt_sigqueueinfo sys_rt_sigqueueinfo -130 common rt_sigsuspend sys_rt_sigsuspend -131 64 sigaltstack sys_sigaltstack -132 common utime sys_utime -133 common mknod sys_mknod +0 common read __x64_sys_read +1 common write __x64_sys_write +2 common open __x64_sys_open +3 common close __x64_sys_close +4 common stat __x64_sys_newstat +5 common fstat __x64_sys_newfstat +6 common lstat __x64_sys_newlstat +7 common poll __x64_sys_poll +8 common lseek __x64_sys_lseek +9 common mmap __x64_sys_mmap +10 common mprotect __x64_sys_mprotect +11 common munmap __x64_sys_munmap +12 common brk __x64_sys_brk +13 64 rt_sigaction __x64_sys_rt_sigaction +14 common rt_sigprocmask __x64_sys_rt_sigprocmask +15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs +16 64 ioctl __x64_sys_ioctl +17 common pread64 __x64_sys_pread64 +18 common pwrite64 __x64_sys_pwrite64 +19 64 readv __x64_sys_readv +20 64 writev __x64_sys_writev +21 common access __x64_sys_access +22 common pipe __x64_sys_pipe +23 common select __x64_sys_select +24 common sched_yield __x64_sys_sched_yield +25 common mremap __x64_sys_mremap +26 common msync __x64_sys_msync +27 common mincore __x64_sys_mincore +28 common madvise __x64_sys_madvise +29 common shmget __x64_sys_shmget +30 common shmat __x64_sys_shmat +31 common shmctl __x64_sys_shmctl +32 common dup __x64_sys_dup +33 common dup2 __x64_sys_dup2 +34 common pause __x64_sys_pause +35 common nanosleep __x64_sys_nanosleep +36 common getitimer __x64_sys_getitimer +37 common alarm __x64_sys_alarm +38 common setitimer __x64_sys_setitimer +39 common getpid __x64_sys_getpid +40 common sendfile __x64_sys_sendfile64 +41 common socket __x64_sys_socket +42 common connect __x64_sys_connect +43 common accept __x64_sys_accept +44 common sendto __x64_sys_sendto +45 64 recvfrom __x64_sys_recvfrom +46 64 sendmsg __x64_sys_sendmsg +47 64 recvmsg __x64_sys_recvmsg +48 common shutdown __x64_sys_shutdown +49 common bind __x64_sys_bind +50 common listen __x64_sys_listen +51 common getsockname __x64_sys_getsockname +52 common getpeername __x64_sys_getpeername +53 common socketpair __x64_sys_socketpair +54 64 setsockopt __x64_sys_setsockopt +55 64 getsockopt __x64_sys_getsockopt +56 common clone __x64_sys_clone/ptregs +57 common fork __x64_sys_fork/ptregs +58 common vfork __x64_sys_vfork/ptregs +59 64 execve __x64_sys_execve/ptregs +60 common exit __x64_sys_exit +61 common wait4 __x64_sys_wait4 +62 common kill __x64_sys_kill +63 common uname __x64_sys_newuname +64 common semget __x64_sys_semget +65 common semop __x64_sys_semop +66 common semctl __x64_sys_semctl +67 common shmdt __x64_sys_shmdt +68 common msgget __x64_sys_msgget +69 common msgsnd __x64_sys_msgsnd +70 common msgrcv __x64_sys_msgrcv +71 common msgctl __x64_sys_msgctl +72 common fcntl __x64_sys_fcntl +73 common flock __x64_sys_flock +74 common fsync __x64_sys_fsync +75 common fdatasync __x64_sys_fdatasync +76 common truncate __x64_sys_truncate +77 common ftruncate __x64_sys_ftruncate +78 common getdents __x64_sys_getdents +79 common getcwd __x64_sys_getcwd +80 common chdir __x64_sys_chdir +81 common fchdir __x64_sys_fchdir +82 common rename __x64_sys_rename +83 common mkdir __x64_sys_mkdir +84 common rmdir __x64_sys_rmdir +85 common creat __x64_sys_creat +86 common link __x64_sys_link +87 common unlink __x64_sys_unlink +88 common symlink __x64_sys_symlink +89 common readlink __x64_sys_readlink +90 common chmod __x64_sys_chmod +91 common fchmod __x64_sys_fchmod +92 common chown __x64_sys_chown +93 common fchown __x64_sys_fchown +94 common lchown __x64_sys_lchown +95 common umask __x64_sys_umask +96 common gettimeofday __x64_sys_gettimeofday +97 common getrlimit __x64_sys_getrlimit +98 common getrusage __x64_sys_getrusage +99 common sysinfo __x64_sys_sysinfo +100 common times __x64_sys_times +101 64 ptrace __x64_sys_ptrace +102 common getuid __x64_sys_getuid +103 common syslog __x64_sys_syslog +104 common getgid __x64_sys_getgid +105 common setuid __x64_sys_setuid +106 common setgid __x64_sys_setgid +107 common geteuid __x64_sys_geteuid +108 common getegid __x64_sys_getegid +109 common setpgid __x64_sys_setpgid +110 common getppid __x64_sys_getppid +111 common getpgrp __x64_sys_getpgrp +112 common setsid __x64_sys_setsid +113 common setreuid __x64_sys_setreuid +114 common setregid __x64_sys_setregid +115 common getgroups __x64_sys_getgroups +116 common setgroups __x64_sys_setgroups +117 common setresuid __x64_sys_setresuid +118 common getresuid __x64_sys_getresuid +119 common setresgid __x64_sys_setresgid +120 common getresgid __x64_sys_getresgid +121 common getpgid __x64_sys_getpgid +122 common setfsuid __x64_sys_setfsuid +123 common setfsgid __x64_sys_setfsgid +124 common getsid __x64_sys_getsid +125 common capget __x64_sys_capget +126 common capset __x64_sys_capset +127 64 rt_sigpending __x64_sys_rt_sigpending +128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait +129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo +130 common rt_sigsuspend __x64_sys_rt_sigsuspend +131 64 sigaltstack __x64_sys_sigaltstack +132 common utime __x64_sys_utime +133 common mknod __x64_sys_mknod 134 64 uselib -135 common personality sys_personality -136 common ustat sys_ustat -137 common statfs sys_statfs -138 common fstatfs sys_fstatfs -139 common sysfs sys_sysfs -140 common getpriority sys_getpriority -141 common setpriority sys_setpriority -142 common sched_setparam sys_sched_setparam -143 common sched_getparam sys_sched_getparam -144 common sched_setscheduler sys_sched_setscheduler -145 common sched_getscheduler sys_sched_getscheduler -146 common sched_get_priority_max sys_sched_get_priority_max -147 common sched_get_priority_min sys_sched_get_priority_min -148 common sched_rr_get_interval sys_sched_rr_get_interval -149 common mlock sys_mlock -150 common munlock sys_munlock -151 common mlockall sys_mlockall -152 common munlockall sys_munlockall -153 common vhangup sys_vhangup -154 common modify_ldt sys_modify_ldt -155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl -157 common prctl sys_prctl -158 common arch_prctl sys_arch_prctl -159 common adjtimex sys_adjtimex -160 common setrlimit sys_setrlimit -161 common chroot sys_chroot -162 common sync sys_sync -163 common acct sys_acct -164 common settimeofday sys_settimeofday -165 common mount sys_mount -166 common umount2 sys_umount -167 common swapon sys_swapon -168 common swapoff sys_swapoff -169 common reboot sys_reboot -170 common sethostname sys_sethostname -171 common setdomainname sys_setdomainname -172 common iopl sys_iopl/ptregs -173 common ioperm sys_ioperm +135 common personality __x64_sys_personality +136 common ustat __x64_sys_ustat +137 common statfs __x64_sys_statfs +138 common fstatfs __x64_sys_fstatfs +139 common sysfs __x64_sys_sysfs +140 common getpriority __x64_sys_getpriority +141 common setpriority __x64_sys_setpriority +142 common sched_setparam __x64_sys_sched_setparam +143 common sched_getparam __x64_sys_sched_getparam +144 common sched_setscheduler __x64_sys_sched_setscheduler +145 common sched_getscheduler __x64_sys_sched_getscheduler +146 common sched_get_priority_max __x64_sys_sched_get_priority_max +147 common sched_get_priority_min __x64_sys_sched_get_priority_min +148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval +149 common mlock __x64_sys_mlock +150 common munlock __x64_sys_munlock +151 common mlockall __x64_sys_mlockall +152 common munlockall __x64_sys_munlockall +153 common vhangup __x64_sys_vhangup +154 common modify_ldt __x64_sys_modify_ldt +155 common pivot_root __x64_sys_pivot_root +156 64 _sysctl __x64_sys_sysctl +157 common prctl __x64_sys_prctl +158 common arch_prctl __x64_sys_arch_prctl +159 common adjtimex __x64_sys_adjtimex +160 common setrlimit __x64_sys_setrlimit +161 common chroot __x64_sys_chroot +162 common sync __x64_sys_sync +163 common acct __x64_sys_acct +164 common settimeofday __x64_sys_settimeofday +165 common mount __x64_sys_mount +166 common umount2 __x64_sys_umount +167 common swapon __x64_sys_swapon +168 common swapoff __x64_sys_swapoff +169 common reboot __x64_sys_reboot +170 common sethostname __x64_sys_sethostname +171 common setdomainname __x64_sys_setdomainname +172 common iopl __x64_sys_iopl/ptregs +173 common ioperm __x64_sys_ioperm 174 64 create_module -175 common init_module sys_init_module -176 common delete_module sys_delete_module +175 common init_module __x64_sys_init_module +176 common delete_module __x64_sys_delete_module 177 64 get_kernel_syms 178 64 query_module -179 common quotactl sys_quotactl +179 common quotactl __x64_sys_quotactl 180 64 nfsservctl 181 common getpmsg 182 common putpmsg 183 common afs_syscall 184 common tuxcall 185 common security -186 common gettid sys_gettid -187 common readahead sys_readahead -188 common setxattr sys_setxattr -189 common lsetxattr sys_lsetxattr -190 common fsetxattr sys_fsetxattr -191 common getxattr sys_getxattr -192 common lgetxattr sys_lgetxattr -193 common fgetxattr sys_fgetxattr -194 common listxattr sys_listxattr -195 common llistxattr sys_llistxattr -196 common flistxattr sys_flistxattr -197 common removexattr sys_removexattr -198 common lremovexattr sys_lremovexattr -199 common fremovexattr sys_fremovexattr -200 common tkill sys_tkill -201 common time sys_time -202 common futex sys_futex -203 common sched_setaffinity sys_sched_setaffinity -204 common sched_getaffinity sys_sched_getaffinity +186 common gettid __x64_sys_gettid +187 common readahead __x64_sys_readahead +188 common setxattr __x64_sys_setxattr +189 common lsetxattr __x64_sys_lsetxattr +190 common fsetxattr __x64_sys_fsetxattr +191 common getxattr __x64_sys_getxattr +192 common lgetxattr __x64_sys_lgetxattr +193 common fgetxattr __x64_sys_fgetxattr +194 common listxattr __x64_sys_listxattr +195 common llistxattr __x64_sys_llistxattr +196 common flistxattr __x64_sys_flistxattr +197 common removexattr __x64_sys_removexattr +198 common lremovexattr __x64_sys_lremovexattr +199 common fremovexattr __x64_sys_fremovexattr +200 common tkill __x64_sys_tkill +201 common time __x64_sys_time +202 common futex __x64_sys_futex +203 common sched_setaffinity __x64_sys_sched_setaffinity +204 common sched_getaffinity __x64_sys_sched_getaffinity 205 64 set_thread_area -206 64 io_setup sys_io_setup -207 common io_destroy sys_io_destroy -208 common io_getevents sys_io_getevents -209 64 io_submit sys_io_submit -210 common io_cancel sys_io_cancel +206 64 io_setup __x64_sys_io_setup +207 common io_destroy __x64_sys_io_destroy +208 common io_getevents __x64_sys_io_getevents +209 64 io_submit __x64_sys_io_submit +210 common io_cancel __x64_sys_io_cancel 211 64 get_thread_area -212 common lookup_dcookie sys_lookup_dcookie -213 common epoll_create sys_epoll_create +212 common lookup_dcookie __x64_sys_lookup_dcookie +213 common epoll_create __x64_sys_epoll_create 214 64 epoll_ctl_old 215 64 epoll_wait_old -216 common remap_file_pages sys_remap_file_pages -217 common getdents64 sys_getdents64 -218 common set_tid_address sys_set_tid_address -219 common restart_syscall sys_restart_syscall -220 common semtimedop sys_semtimedop -221 common fadvise64 sys_fadvise64 -222 64 timer_create sys_timer_create -223 common timer_settime sys_timer_settime -224 common timer_gettime sys_timer_gettime -225 common timer_getoverrun sys_timer_getoverrun -226 common timer_delete sys_timer_delete -227 common clock_settime sys_clock_settime -228 common clock_gettime sys_clock_gettime -229 common clock_getres sys_clock_getres -230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group -232 common epoll_wait sys_epoll_wait -233 common epoll_ctl sys_epoll_ctl -234 common tgkill sys_tgkill -235 common utimes sys_utimes +216 common remap_file_pages __x64_sys_remap_file_pages +217 common getdents64 __x64_sys_getdents64 +218 common set_tid_address __x64_sys_set_tid_address +219 common restart_syscall __x64_sys_restart_syscall +220 common semtimedop __x64_sys_semtimedop +221 common fadvise64 __x64_sys_fadvise64 +222 64 timer_create __x64_sys_timer_create +223 common timer_settime __x64_sys_timer_settime +224 common timer_gettime __x64_sys_timer_gettime +225 common timer_getoverrun __x64_sys_timer_getoverrun +226 common timer_delete __x64_sys_timer_delete +227 common clock_settime __x64_sys_clock_settime +228 common clock_gettime __x64_sys_clock_gettime +229 common clock_getres __x64_sys_clock_getres +230 common clock_nanosleep __x64_sys_clock_nanosleep +231 common exit_group __x64_sys_exit_group +232 common epoll_wait __x64_sys_epoll_wait +233 common epoll_ctl __x64_sys_epoll_ctl +234 common tgkill __x64_sys_tgkill +235 common utimes __x64_sys_utimes 236 64 vserver -237 common mbind sys_mbind -238 common set_mempolicy sys_set_mempolicy -239 common get_mempolicy sys_get_mempolicy -240 common mq_open sys_mq_open -241 common mq_unlink sys_mq_unlink -242 common mq_timedsend sys_mq_timedsend -243 common mq_timedreceive sys_mq_timedreceive -244 64 mq_notify sys_mq_notify -245 common mq_getsetattr sys_mq_getsetattr -246 64 kexec_load sys_kexec_load -247 64 waitid sys_waitid -248 common add_key sys_add_key -249 common request_key sys_request_key -250 common keyctl sys_keyctl -251 common ioprio_set sys_ioprio_set -252 common ioprio_get sys_ioprio_get -253 common inotify_init sys_inotify_init -254 common inotify_add_watch sys_inotify_add_watch -255 common inotify_rm_watch sys_inotify_rm_watch -256 common migrate_pages sys_migrate_pages -257 common openat sys_openat -258 common mkdirat sys_mkdirat -259 common mknodat sys_mknodat -260 common fchownat sys_fchownat -261 common futimesat sys_futimesat -262 common newfstatat sys_newfstatat -263 common unlinkat sys_unlinkat -264 common renameat sys_renameat -265 common linkat sys_linkat -266 common symlinkat sys_symlinkat -267 common readlinkat sys_readlinkat -268 common fchmodat sys_fchmodat -269 common faccessat sys_faccessat -270 common pselect6 sys_pselect6 -271 common ppoll sys_ppoll -272 common unshare sys_unshare -273 64 set_robust_list sys_set_robust_list -274 64 get_robust_list sys_get_robust_list -275 common splice sys_splice -276 common tee sys_tee -277 common sync_file_range sys_sync_file_range -278 64 vmsplice sys_vmsplice -279 64 move_pages sys_move_pages -280 common utimensat sys_utimensat -281 common epoll_pwait sys_epoll_pwait -282 common signalfd sys_signalfd -283 common timerfd_create sys_timerfd_create -284 common eventfd sys_eventfd -285 common fallocate sys_fallocate -286 common timerfd_settime sys_timerfd_settime -287 common timerfd_gettime sys_timerfd_gettime -288 common accept4 sys_accept4 -289 common signalfd4 sys_signalfd4 -290 common eventfd2 sys_eventfd2 -291 common epoll_create1 sys_epoll_create1 -292 common dup3 sys_dup3 -293 common pipe2 sys_pipe2 -294 common inotify_init1 sys_inotify_init1 -295 64 preadv sys_preadv -296 64 pwritev sys_pwritev -297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo -298 common perf_event_open sys_perf_event_open -299 64 recvmmsg sys_recvmmsg -300 common fanotify_init sys_fanotify_init -301 common fanotify_mark sys_fanotify_mark -302 common prlimit64 sys_prlimit64 -303 common name_to_handle_at sys_name_to_handle_at -304 common open_by_handle_at sys_open_by_handle_at -305 common clock_adjtime sys_clock_adjtime -306 common syncfs sys_syncfs -307 64 sendmmsg sys_sendmmsg -308 common setns sys_setns -309 common getcpu sys_getcpu -310 64 process_vm_readv sys_process_vm_readv -311 64 process_vm_writev sys_process_vm_writev -312 common kcmp sys_kcmp -313 common finit_module sys_finit_module -314 common sched_setattr sys_sched_setattr -315 common sched_getattr sys_sched_getattr -316 common renameat2 sys_renameat2 -317 common seccomp sys_seccomp -318 common getrandom sys_getrandom -319 common memfd_create sys_memfd_create -320 common kexec_file_load sys_kexec_file_load -321 common bpf sys_bpf -322 64 execveat sys_execveat/ptregs -323 common userfaultfd sys_userfaultfd -324 common membarrier sys_membarrier -325 common mlock2 sys_mlock2 -326 common copy_file_range sys_copy_file_range -327 64 preadv2 sys_preadv2 -328 64 pwritev2 sys_pwritev2 -329 common pkey_mprotect sys_pkey_mprotect -330 common pkey_alloc sys_pkey_alloc -331 common pkey_free sys_pkey_free -332 common statx sys_statx +237 common mbind __x64_sys_mbind +238 common set_mempolicy __x64_sys_set_mempolicy +239 common get_mempolicy __x64_sys_get_mempolicy +240 common mq_open __x64_sys_mq_open +241 common mq_unlink __x64_sys_mq_unlink +242 common mq_timedsend __x64_sys_mq_timedsend +243 common mq_timedreceive __x64_sys_mq_timedreceive +244 64 mq_notify __x64_sys_mq_notify +245 common mq_getsetattr __x64_sys_mq_getsetattr +246 64 kexec_load __x64_sys_kexec_load +247 64 waitid __x64_sys_waitid +248 common add_key __x64_sys_add_key +249 common request_key __x64_sys_request_key +250 common keyctl __x64_sys_keyctl +251 common ioprio_set __x64_sys_ioprio_set +252 common ioprio_get __x64_sys_ioprio_get +253 common inotify_init __x64_sys_inotify_init +254 common inotify_add_watch __x64_sys_inotify_add_watch +255 common inotify_rm_watch __x64_sys_inotify_rm_watch +256 common migrate_pages __x64_sys_migrate_pages +257 common openat __x64_sys_openat +258 common mkdirat __x64_sys_mkdirat +259 common mknodat __x64_sys_mknodat +260 common fchownat __x64_sys_fchownat +261 common futimesat __x64_sys_futimesat +262 common newfstatat __x64_sys_newfstatat +263 common unlinkat __x64_sys_unlinkat +264 common renameat __x64_sys_renameat +265 common linkat __x64_sys_linkat +266 common symlinkat __x64_sys_symlinkat +267 common readlinkat __x64_sys_readlinkat +268 common fchmodat __x64_sys_fchmodat +269 common faccessat __x64_sys_faccessat +270 common pselect6 __x64_sys_pselect6 +271 common ppoll __x64_sys_ppoll +272 common unshare __x64_sys_unshare +273 64 set_robust_list __x64_sys_set_robust_list +274 64 get_robust_list __x64_sys_get_robust_list +275 common splice __x64_sys_splice +276 common tee __x64_sys_tee +277 common sync_file_range __x64_sys_sync_file_range +278 64 vmsplice __x64_sys_vmsplice +279 64 move_pages __x64_sys_move_pages +280 common utimensat __x64_sys_utimensat +281 common epoll_pwait __x64_sys_epoll_pwait +282 common signalfd __x64_sys_signalfd +283 common timerfd_create __x64_sys_timerfd_create +284 common eventfd __x64_sys_eventfd +285 common fallocate __x64_sys_fallocate +286 common timerfd_settime __x64_sys_timerfd_settime +287 common timerfd_gettime __x64_sys_timerfd_gettime +288 common accept4 __x64_sys_accept4 +289 common signalfd4 __x64_sys_signalfd4 +290 common eventfd2 __x64_sys_eventfd2 +291 common epoll_create1 __x64_sys_epoll_create1 +292 common dup3 __x64_sys_dup3 +293 common pipe2 __x64_sys_pipe2 +294 common inotify_init1 __x64_sys_inotify_init1 +295 64 preadv __x64_sys_preadv +296 64 pwritev __x64_sys_pwritev +297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo +298 common perf_event_open __x64_sys_perf_event_open +299 64 recvmmsg __x64_sys_recvmmsg +300 common fanotify_init __x64_sys_fanotify_init +301 common fanotify_mark __x64_sys_fanotify_mark +302 common prlimit64 __x64_sys_prlimit64 +303 common name_to_handle_at __x64_sys_name_to_handle_at +304 common open_by_handle_at __x64_sys_open_by_handle_at +305 common clock_adjtime __x64_sys_clock_adjtime +306 common syncfs __x64_sys_syncfs +307 64 sendmmsg __x64_sys_sendmmsg +308 common setns __x64_sys_setns +309 common getcpu __x64_sys_getcpu +310 64 process_vm_readv __x64_sys_process_vm_readv +311 64 process_vm_writev __x64_sys_process_vm_writev +312 common kcmp __x64_sys_kcmp +313 common finit_module __x64_sys_finit_module +314 common sched_setattr __x64_sys_sched_setattr +315 common sched_getattr __x64_sys_sched_getattr +316 common renameat2 __x64_sys_renameat2 +317 common seccomp __x64_sys_seccomp +318 common getrandom __x64_sys_getrandom +319 common memfd_create __x64_sys_memfd_create +320 common kexec_file_load __x64_sys_kexec_file_load +321 common bpf __x64_sys_bpf +322 64 execveat __x64_sys_execveat/ptregs +323 common userfaultfd __x64_sys_userfaultfd +324 common membarrier __x64_sys_membarrier +325 common mlock2 __x64_sys_mlock2 +326 common copy_file_range __x64_sys_copy_file_range +327 64 preadv2 __x64_sys_preadv2 +328 64 pwritev2 __x64_sys_pwritev2 +329 common pkey_mprotect __x64_sys_pkey_mprotect +330 common pkey_alloc __x64_sys_pkey_alloc +331 common pkey_free __x64_sys_pkey_free +332 common statx __x64_sys_statx # # x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. +# for native 64-bit operation. The __x32_compat_sys stubs are created +# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 +# is defined. # -512 x32 rt_sigaction compat_sys_rt_sigaction +512 x32 rt_sigaction __x32_compat_sys_rt_sigaction 513 x32 rt_sigreturn sys32_x32_rt_sigreturn -514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev -517 x32 recvfrom compat_sys_recvfrom -518 x32 sendmsg compat_sys_sendmsg -519 x32 recvmsg compat_sys_recvmsg -520 x32 execve compat_sys_execve/ptregs -521 x32 ptrace compat_sys_ptrace -522 x32 rt_sigpending compat_sys_rt_sigpending -523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait -524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo -525 x32 sigaltstack compat_sys_sigaltstack -526 x32 timer_create compat_sys_timer_create -527 x32 mq_notify compat_sys_mq_notify -528 x32 kexec_load compat_sys_kexec_load -529 x32 waitid compat_sys_waitid -530 x32 set_robust_list compat_sys_set_robust_list -531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice -533 x32 move_pages compat_sys_move_pages -534 x32 preadv compat_sys_preadv64 -535 x32 pwritev compat_sys_pwritev64 -536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -537 x32 recvmmsg compat_sys_recvmmsg -538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev -541 x32 setsockopt compat_sys_setsockopt -542 x32 getsockopt compat_sys_getsockopt -543 x32 io_setup compat_sys_io_setup -544 x32 io_submit compat_sys_io_submit -545 x32 execveat compat_sys_execveat/ptregs -546 x32 preadv2 compat_sys_preadv64v2 -547 x32 pwritev2 compat_sys_pwritev64v2 +514 x32 ioctl __x32_compat_sys_ioctl +515 x32 readv __x32_compat_sys_readv +516 x32 writev __x32_compat_sys_writev +517 x32 recvfrom __x32_compat_sys_recvfrom +518 x32 sendmsg __x32_compat_sys_sendmsg +519 x32 recvmsg __x32_compat_sys_recvmsg +520 x32 execve __x32_compat_sys_execve/ptregs +521 x32 ptrace __x32_compat_sys_ptrace +522 x32 rt_sigpending __x32_compat_sys_rt_sigpending +523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait +524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo +525 x32 sigaltstack __x32_compat_sys_sigaltstack +526 x32 timer_create __x32_compat_sys_timer_create +527 x32 mq_notify __x32_compat_sys_mq_notify +528 x32 kexec_load __x32_compat_sys_kexec_load +529 x32 waitid __x32_compat_sys_waitid +530 x32 set_robust_list __x32_compat_sys_set_robust_list +531 x32 get_robust_list __x32_compat_sys_get_robust_list +532 x32 vmsplice __x32_compat_sys_vmsplice +533 x32 move_pages __x32_compat_sys_move_pages +534 x32 preadv __x32_compat_sys_preadv64 +535 x32 pwritev __x32_compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg __x32_compat_sys_recvmmsg +538 x32 sendmmsg __x32_compat_sys_sendmmsg +539 x32 process_vm_readv __x32_compat_sys_process_vm_readv +540 x32 process_vm_writev __x32_compat_sys_process_vm_writev +541 x32 setsockopt __x32_compat_sys_setsockopt +542 x32 getsockopt __x32_compat_sys_getsockopt +543 x32 io_setup __x32_compat_sys_io_setup +544 x32 io_submit __x32_compat_sys_io_submit +545 x32 execveat __x32_compat_sys_execveat/ptregs +546 x32 preadv2 __x32_compat_sys_preadv64v2 +547 x32 pwritev2 __x32_compat_sys_pwritev64v2 From 101592b4904ecf6b8ed2a4784d41d180319d95a1 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Mon, 9 Apr 2018 10:25:32 +0300 Subject: [PATCH 229/660] perf/core: Store context switch out type in PERF_RECORD_SWITCH[_CPU_WIDE] Store preempting context switch out event into Perf trace as a part of PERF_RECORD_SWITCH[_CPU_WIDE] record. Percentage of preempting and non-preempting context switches help understanding the nature of workloads (CPU or IO bound) that are running on a machine; The event is treated as preemption one when task->state value of the thread being switched out is TASK_RUNNING. Event type encoding is implemented using PERF_RECORD_MISC_SWITCH_OUT_PREEMPT bit; Signed-off-by: Alexey Budankov Acked-by: Peter Zijlstra Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/9ff84e83-a0ca-dd82-a6d0-cb951689be74@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 18 +++++++++++++++--- kernel/events/core.c | 4 ++++ tools/include/uapi/linux/perf_event.h | 18 +++++++++++++++--- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 912b85b52344..b8e288a1f740 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -650,11 +650,23 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* - * Indicates that the content of PERF_SAMPLE_IP points to - * the actual instruction that triggered the event. See also - * perf_event_attr::precise_ip. + * These PERF_RECORD_MISC_* flags below are safely reused + * for the following events: + * + * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * + * + * PERF_RECORD_MISC_EXACT_IP: + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + * + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: + * Indicates that thread was preempted in TASK_RUNNING state. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) +#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 2d5fe26551f8..1bae80aaabfb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7587,6 +7587,10 @@ static void perf_event_switch(struct task_struct *task, }, }; + if (!sched_in && task->state == TASK_RUNNING) + switch_event.event_id.header.misc |= + PERF_RECORD_MISC_SWITCH_OUT_PREEMPT; + perf_iterate_sb(perf_event_switch_output, &switch_event, NULL); diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 912b85b52344..b8e288a1f740 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -650,11 +650,23 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* - * Indicates that the content of PERF_SAMPLE_IP points to - * the actual instruction that triggered the event. See also - * perf_event_attr::precise_ip. + * These PERF_RECORD_MISC_* flags below are safely reused + * for the following events: + * + * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * + * + * PERF_RECORD_MISC_EXACT_IP: + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + * + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: + * Indicates that thread was preempted in TASK_RUNNING state. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) +#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ From b3f35b5d5d36fba9311d1a965fcce2dd35614f2e Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Mon, 9 Apr 2018 10:26:05 +0300 Subject: [PATCH 230/660] perf report: Extend raw dump (-D) out with switch out event type Print additional 'preempt' tag for PERF_RECORD_SWITCH[_CPU_WIDE] OUT records when event header misc field contains PERF_RECORD_MISC_SWITCH_OUT_PREEMPT bit set designating preemption context switch out event: tools/perf/perf report -D -i perf.data | grep _SWITCH 0 768361415226 0x27f076 [0x28]: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 8/8 4 768362216813 0x28f45e [0x28]: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 4 768362217824 0x28f486 [0x28]: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 4073/4073 0 768362414027 0x27f0ce [0x28]: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 8/8 0 768362414367 0x27f0f6 [0x28]: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/6f5aebb9-b96c-f304-f08f-8f046d38de4f@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f0a6cbd033cc..98ff3a6a3d50 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1421,7 +1421,9 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) { bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; - const char *in_out = out ? "OUT" : "IN "; + const char *in_out = !out ? "IN " : + !(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ? + "OUT " : "OUT preempt"; if (event->header.type == PERF_RECORD_SWITCH) return fprintf(fp, " %s\n", in_out); From bf30cc1882d2c65aaf92842cc9bcf06565eab73c Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Mon, 9 Apr 2018 10:26:46 +0300 Subject: [PATCH 231/660] perf script: Extend misc field decoding with switch out event type Append 'p' sign to 'S' tag designating the type of context switch out event so 'Sp' means preemption context switch. Documentation is extended to cover new presentation changes. $ perf script --show-switch-events -F +misc -I -i perf.data: hdparm 4073 [004] U 762.198265: 380194 cycles:ppp: 7faf727f5a23 strchr (/usr/lib64/ld-2.26.so) hdparm 4073 [004] K 762.198366: 441572 cycles:ppp: ffffffffb9218435 alloc_set_pte (/lib/modules/4.16.0-rc6+/build/vmlinux) hdparm 4073 [004] S 762.198391: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 swapper 0 [004] 762.198392: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 4073/4073 swapper 0 [004] Sp 762.198477: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 4073/4073 hdparm 4073 [004] 762.198478: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 swapper 0 [007] K 762.198514: 2303073 cycles:ppp: ffffffffb98b0c66 intel_idle (/lib/modules/4.16.0-rc6+/build/vmlinux) swapper 0 [007] Sp 762.198561: PERF_RECORD_SWITCH_CPU_WIDE OUT preempt next pid/tid: 1134/1134 kworker/u16:18 1134 [007] 762.198562: PERF_RECORD_SWITCH_CPU_WIDE IN prev pid/tid: 0/0 kworker/u16:18 1134 [007] S 762.198567: PERF_RECORD_SWITCH_CPU_WIDE OUT next pid/tid: 0/0 Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5fc65ce7-8ca5-53ae-8858-8ddd27290575@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 17 +++++++++-------- tools/perf/builtin-script.c | 5 ++++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 36ec0257f8d3..afdafe2110a1 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -228,14 +228,15 @@ OPTIONS For sample events it's possible to display misc field with -F +misc option, following letters are displayed for each bit: - PERF_RECORD_MISC_KERNEL K - PERF_RECORD_MISC_USER U - PERF_RECORD_MISC_HYPERVISOR H - PERF_RECORD_MISC_GUEST_KERNEL G - PERF_RECORD_MISC_GUEST_USER g - PERF_RECORD_MISC_MMAP_DATA* M - PERF_RECORD_MISC_COMM_EXEC E - PERF_RECORD_MISC_SWITCH_OUT S + PERF_RECORD_MISC_KERNEL K + PERF_RECORD_MISC_USER U + PERF_RECORD_MISC_HYPERVISOR H + PERF_RECORD_MISC_GUEST_KERNEL G + PERF_RECORD_MISC_GUEST_USER g + PERF_RECORD_MISC_MMAP_DATA* M + PERF_RECORD_MISC_COMM_EXEC E + PERF_RECORD_MISC_SWITCH_OUT S + PERF_RECORD_MISC_SWITCH_OUT_PREEMPT Sp $ perf script -F +misc ... sched-messaging 1414 K 28690.636582: 4590 cycles ... diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b17edbcd98cc..e0a9845b6cbc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -657,8 +657,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, break; case PERF_RECORD_SWITCH: case PERF_RECORD_SWITCH_CPU_WIDE: - if (has(SWITCH_OUT)) + if (has(SWITCH_OUT)) { ret += fprintf(fp, "S"); + if (sample->misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) + ret += fprintf(fp, "p"); + } default: break; } From 038586c34301578e538f6c5aa79ca82bce1b9152 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 16 Apr 2018 15:23:14 +0200 Subject: [PATCH 232/660] perf list: Add s390 support for detailed/verbose PMU event description 'perf list' with flags -d and -v print a description (-d) or a very verbose explanation (-v) of CPU specific counter events. These descriptions are provided with the json files in directory pmu-events/arch/s390/*.json. Display of these descriptions on s390 requires the corresponding json files. On s390 this does not work because function is_pmu_core() does not detect the s390 directory name where the CPU specific events are listed. On x86 it is: /sys/bus/event_source/devices/cpu whereas on s390 it is: /sys/bus/event_source/devices/cpum_cf /sys/bus/event_source/devices/cpum_sf Fix this by adding s390 directory name testing to function is_pmu_core(). This is the same approach as taken for the ARM platform. Output before: [root@s35lp76 perf]# ./perf list -d pmu List of pre-defined events (to be used in -e): cpum_cf/AES_BLOCKED_CYCLES/ [Kernel PMU event] cpum_cf/AES_BLOCKED_FUNCTIONS/ [Kernel PMU event] cpum_cf/AES_CYCLES/ [Kernel PMU event] cpum_cf/AES_FUNCTIONS/ [Kernel PMU event] .... cpum_cf/TX_NC_TEND/ [Kernel PMU event] cpum_cf/VX_BCD_EXECUTION_SLOTS/ [Kernel PMU event] cpum_sf/SF_CYCLES_BASIC/ [Kernel PMU event] Output after: [root@s35lp76 perf]# ./perf list -d pmu List of pre-defined events (to be used in -e): cpum_cf/AES_BLOCKED_CYCLES/ [Kernel PMU event] cpum_cf/AES_BLOCKED_FUNCTIONS/ [Kernel PMU event] cpum_cf/AES_CYCLES/ [Kernel PMU event] cpum_cf/AES_FUNCTIONS/ [Kernel PMU event] .... cpum_cf/TX_NC_TEND/ [Kernel PMU event] cpum_cf/VX_BCD_EXECUTION_SLOTS/ [Kernel PMU event] cpum_sf/SF_CYCLES_BASIC/ [Kernel PMU event] 3906: bcd_dfp_execution_slots [BCD DFP Execution Slots] decimal_instructions [Decimal Instructions] dtlb2_gpage_writes [DTLB2 GPAGE Writes] dtlb2_hpage_writes [DTLB2 HPAGE Writes] dtlb2_misses [DTLB2 Misses] dtlb2_writes [DTLB2 Writes] itlb2_misses [ITLB2 Misses] itlb2_writes [ITLB2 Writes] l1c_tlb2_misses [L1C TLB2 Misses] ..... cfvn 3: cpu_cycles [CPU Cycles] instructions [Instructions] l1d_dir_writes [L1D Directory Writes] l1d_penalty_cycles [L1D Penalty Cycles] l1i_dir_writes [L1I Directory Writes] l1i_penalty_cycles [L1I Penalty Cycles] problem_state_cpu_cycles [Problem State CPU Cycles] problem_state_instructions [Problem State Instructions] .... csvn generic: aes_blocked_cycles [AES Blocked Cycles] aes_blocked_functions [AES Blocked Functions] aes_cycles [AES Cycles] aes_functions [AES Functions] dea_blocked_cycles [DEA Blocked Cycles] dea_blocked_functions [DEA Blocked Functions] .... Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Acked-by: Mark Rutland Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180416132314.33249-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 064bdcb7bd78..61a5e5027338 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -562,6 +562,12 @@ static int is_pmu_core(const char *name) if (stat(path, &st) == 0) return 1; + /* Look for cpu sysfs (specific to s390) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s", + sysfs, name); + if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5)) + return 1; + return 0; } From 78b562fbfa2cf0a9fcb23c3154756b690f4905c1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Apr 2018 11:23:50 +0200 Subject: [PATCH 233/660] perf: Return proper values for user stack errors Return immediately when we find issue in the user stack checks. The error value could get overwritten by following check for PERF_SAMPLE_REGS_INTR. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: H. Peter Anvin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: syzkaller-bugs@googlegroups.com Cc: x86@kernel.org Fixes: 60e2364e60e8 ("perf: Add ability to sample machine state on interrupt") Link: http://lkml.kernel.org/r/20180415092352.12403-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1bae80aaabfb..67612ce359ad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10209,9 +10209,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, * __u16 sample size limit. */ if (attr->sample_stack_user >= USHRT_MAX) - ret = -EINVAL; + return -EINVAL; else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) - ret = -EINVAL; + return -EINVAL; } if (!attr->sample_max_stack) From 5af44ca53d019de47efe6dbc4003dd518e5197ed Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Apr 2018 11:23:51 +0200 Subject: [PATCH 234/660] perf: Fix sample_max_stack maximum check The syzbot hit KASAN bug in perf_callchain_store having the entry stored behind the allocated bounds [1]. We miss the sample_max_stack check for the initial event that allocates callchain buffers. This missing check allows to create an event with sample_max_stack value bigger than the global sysctl maximum: # sysctl -a | grep perf_event_max_stack kernel.perf_event_max_stack = 127 # perf record -vv -C 1 -e cycles/max-stack=256/ kill ... perf_event_attr: size 112 ... sample_max_stack 256 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 = 4 Note the '-C 1', which forces perf record to create just single event. Otherwise it opens event for every cpu, then the sample_max_stack check fails on the second event and all's fine. The fix is to run the sample_max_stack check also for the first event with callchains. [1] https://marc.info/?l=linux-kernel&m=152352732920874&w=2 Reported-by: syzbot+7c449856228b63ac951e@syzkaller.appspotmail.com Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: H. Peter Anvin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: syzkaller-bugs@googlegroups.com Cc: x86@kernel.org Fixes: 97c79a38cd45 ("perf core: Per event callchain limit") Link: http://lkml.kernel.org/r/20180415092352.12403-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/callchain.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 772a43fea825..73cc26e321de 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -119,19 +119,22 @@ int get_callchain_buffers(int event_max_stack) goto exit; } + /* + * If requesting per event more than the global cap, + * return a different error to help userspace figure + * this out. + * + * And also do it here so that we have &callchain_mutex held. + */ + if (event_max_stack > sysctl_perf_event_max_stack) { + err = -EOVERFLOW; + goto exit; + } + if (count > 1) { /* If the allocation failed, give up */ if (!callchain_cpus_entries) err = -ENOMEM; - /* - * If requesting per event more than the global cap, - * return a different error to help userspace figure - * this out. - * - * And also do it here so that we have &callchain_mutex held. - */ - if (event_max_stack > sysctl_perf_event_max_stack) - err = -EOVERFLOW; goto exit; } From bfb3d7b8b906b66551424d7636182126e1d134c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Apr 2018 11:23:52 +0200 Subject: [PATCH 235/660] perf: Remove superfluous allocation error check If the get_callchain_buffers fails to allocate the buffer it will decrease the nr_callchain_events right away. There's no point of checking the allocation error for nr_callchain_events > 1. Removing that check. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: H. Peter Anvin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: syzkaller-bugs@googlegroups.com Cc: x86@kernel.org Link: http://lkml.kernel.org/r/20180415092352.12403-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/callchain.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 73cc26e321de..c187aa3df3c8 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -131,14 +131,8 @@ int get_callchain_buffers(int event_max_stack) goto exit; } - if (count > 1) { - /* If the allocation failed, give up */ - if (!callchain_cpus_entries) - err = -ENOMEM; - goto exit; - } - - err = alloc_callchain_buffers(); + if (count == 1) + err = alloc_callchain_buffers(); exit: if (err) atomic_dec(&nr_callchain_events); From 1340ccfa9a9afefdbab90d7935d4ed19817e37c2 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 6 Apr 2018 17:21:30 -0700 Subject: [PATCH 236/660] x86,sched: Allow topologies where NUMA nodes share an LLC Intel's Skylake Server CPUs have a different LLC topology than previous generations. When in Sub-NUMA-Clustering (SNC) mode, the package is divided into two "slices", each containing half the cores, half the LLC, and one memory controller and each slice is enumerated to Linux as a NUMA node. This is similar to how the cores and LLC were arranged for the Cluster-On-Die (CoD) feature. CoD allowed the same cache line to be present in each half of the LLC. But, with SNC, each line is only ever present in *one* slice. This means that the portion of the LLC *available* to a CPU depends on the data being accessed: Remote socket: entire package LLC is shared Local socket->local slice: data goes into local slice LLC Local socket->remote slice: data goes into remote-slice LLC. Slightly higher latency than local slice LLC. The biggest implication from this is that a process accessing all NUMA-local memory only sees half the LLC capacity. The CPU describes its cache hierarchy with the CPUID instruction. One of the CPUID leaves enumerates the "logical processors sharing this cache". This information is used for scheduling decisions so that tasks move more freely between CPUs sharing the cache. But, the CPUID for the SNC configuration discussed above enumerates the LLC as being shared by the entire package. This is not 100% precise because the entire cache is not usable by all accesses. But, it *is* the way the hardware enumerates itself, and this is not likely to change. The userspace visible impact of all the above is that the sysfs info reports the entire LLC as being available to the entire package. As noted above, this is not true for local socket accesses. This patch does not correct the sysfs info. It is the same, pre and post patch. The current code emits the following warning: sched: CPU #3's llc-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency. The warning is coming from the topology_sane() check in smpboot.c because the topology is not matching the expectations of the model for obvious reasons. To fix this, add a vendor and model specific check to never call topology_sane() for these systems. Also, just like "Cluster-on-Die" disable the "coregroup" sched_domain_topology_level and use NUMA information from the SRAT alone. This is OK at least on the hardware we are immediately concerned about because the LLC sharing happens at both the slice and at the package level, which are also NUMA boundaries. Signed-off-by: Alison Schofield Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Prarit Bhargava Cc: Tony Luck Cc: Peter Zijlstra (Intel) Cc: brice.goglin@gmail.com Cc: Dave Hansen Cc: Borislav Petkov Cc: David Rientjes Cc: Igor Mammedov Cc: "H. Peter Anvin" Cc: Tim Chen Link: https://lkml.kernel.org/r/20180407002130.GA18984@alison-desk.jf.intel.com --- arch/x86/kernel/smpboot.c | 45 ++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ff99e2b6fc54..45175b81dd5b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,6 +77,8 @@ #include #include #include +#include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -390,15 +392,47 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } +/* + * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs. + * + * These are Intel CPUs that enumerate an LLC that is shared by + * multiple NUMA nodes. The LLC on these systems is shared for + * off-package data access but private to the NUMA node (half + * of the package) for on-package access. + * + * CPUID (the source of the information about the LLC) can only + * enumerate the cache as being shared *or* unshared, but not + * this particular configuration. The CPU in this case enumerates + * the cache to be shared across the entire package (spanning both + * NUMA nodes). + */ + +static const struct x86_cpu_id snc_cpu[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X }, + {} +}; + static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; - if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID && - per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) - return topology_sane(c, o, "llc"); + /* Do not match if we do not have a valid APICID for cpu: */ + if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID) + return false; - return false; + /* Do not match if LLC id does not match: */ + if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2)) + return false; + + /* + * Allow the SNC topology without warning. Return of false + * means 'c' does not share the LLC of 'o'. This will be + * reflected to userspace. + */ + if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu)) + return false; + + return topology_sane(c, o, "llc"); } /* @@ -456,7 +490,8 @@ static struct sched_domain_topology_level x86_topology[] = { /* * Set if a package/die has multiple NUMA nodes inside. - * AMD Magny-Cours and Intel Cluster-on-Die have this. + * AMD Magny-Cours, Intel Cluster-on-Die, and Intel + * Sub-NUMA Clustering have this. */ static bool x86_has_numa_in_package; From d6ef1f194b7569af8b8397876dc9ab07649d63cb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 17 Apr 2018 15:27:16 +0200 Subject: [PATCH 237/660] x86/mm: Prevent kernel Oops in PTDUMP code with HIGHPTE=y The walk_pte_level() function just uses __va to get the virtual address of the PTE page, but that breaks when the PTE page is not in the direct mapping with HIGHPTE=y. The result is an unhandled kernel paging request at some random address when accessing the current_kernel or current_user file. Use the correct API to access PTE pages. Fixes: fe770bf0310d ('x86: clean up the page table dumper and add 32-bit support') Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: jgross@suse.com Cc: JBeulich@suse.com Cc: hpa@zytor.com Cc: aryabinin@virtuozzo.com Cc: kirill.shutemov@linux.intel.com Link: https://lkml.kernel.org/r/1523971636-4137-1-git-send-email-joro@8bytes.org --- arch/x86/mm/dump_pagetables.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 62a7e9f65dec..cc7ff5957194 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, pgprotval_t eff_in, unsigned long P) { int i; - pte_t *start; + pte_t *pte; pgprotval_t prot, eff; - start = (pte_t *)pmd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PTE; i++) { - prot = pte_flags(*start); - eff = effective_prot(eff_in, prot); st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); + pte = pte_offset_map(&addr, st->current_address); + prot = pte_flags(*pte); + eff = effective_prot(eff_in, prot); note_page(m, st, __pgprot(prot), eff, 5); - start++; + pte_unmap(pte); } } #ifdef CONFIG_KASAN From 05e489b1596f0aa1025a1fa572676631cd9665da Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 17 Apr 2018 14:25:58 +0800 Subject: [PATCH 238/660] VSOCK: make af_vsock.ko removable again Commit c1eef220c1760762753b602c382127bfccee226d ("vsock: always call vsock_init_tables()") introduced a module_init() function without a corresponding module_exit() function. Modules with an init function can only be removed if they also have an exit function. Therefore the vsock module was considered "permanent" and could not be removed. This patch adds an empty module_exit() function so that "rmmod vsock" works. No explicit cleanup is required because: 1. Transports call vsock_core_exit() upon exit and cannot be removed while sockets are still alive. 2. vsock_diag.ko does not perform any action that requires cleanup by vsock.ko. Fixes: c1eef220c176 ("vsock: always call vsock_init_tables()") Reported-by: Xiumei Mu Cc: Cong Wang Cc: Jorgen Hansen Signed-off-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index aac9b8f6552e..c1076c19b858 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2018,7 +2018,13 @@ const struct vsock_transport *vsock_core_get_transport(void) } EXPORT_SYMBOL_GPL(vsock_core_get_transport); +static void __exit vsock_exit(void) +{ + /* Do nothing. This function makes this module removable. */ +} + module_init(vsock_init_tables); +module_exit(vsock_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); From 77ac725e0c5b27c925e514b999cd46d01eedafd1 Mon Sep 17 00:00:00 2001 From: Nicolas Dechesne Date: Tue, 17 Apr 2018 14:03:26 +0200 Subject: [PATCH 239/660] net: qrtr: add MODULE_ALIAS_NETPROTO macro To ensure that qrtr can be loaded automatically, when needed, if it is compiled as module. Signed-off-by: Nicolas Dechesne Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index b33e5aeb4c06..2aa07b547b16 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1135,3 +1135,4 @@ module_exit(qrtr_proto_fini); MODULE_DESCRIPTION("Qualcomm IPC-router driver"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_NETPROTO(PF_QIPCRTR); From 800cb2e553d44541b83aa3ec45d9839385fe8ab6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Apr 2018 14:44:41 +0100 Subject: [PATCH 240/660] arm64: kasan: avoid pfn_to_nid() before page array is initialized In arm64's kasan_init(), we use pfn_to_nid() to find the NUMA node a span of memory is in, hoping to allocate shadow from the same NUMA node. However, at this point, the page array has not been initialized, and thus this is bogus. Since commit: f165b378bbdf6c8a ("mm: uninitialized struct page poisoning sanity") ... accessing fields of the page array results in a boot time Oops(), highlighting this problem: [ 0.000000] Unable to handle kernel paging request at virtual address dfff200000000000 [ 0.000000] Mem abort info: [ 0.000000] ESR = 0x96000004 [ 0.000000] Exception class = DABT (current EL), IL = 32 bits [ 0.000000] SET = 0, FnV = 0 [ 0.000000] EA = 0, S1PTW = 0 [ 0.000000] Data abort info: [ 0.000000] ISV = 0, ISS = 0x00000004 [ 0.000000] CM = 0, WnR = 0 [ 0.000000] [dfff200000000000] address between user and kernel address ranges [ 0.000000] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.16.0-07317-gf165b378bbdf #42 [ 0.000000] Hardware name: ARM Juno development board (r1) (DT) [ 0.000000] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 0.000000] pc : __asan_load8+0x8c/0xa8 [ 0.000000] lr : __dump_page+0x3c/0x3b8 [ 0.000000] sp : ffff2000099b7ca0 [ 0.000000] x29: ffff2000099b7ca0 x28: ffff20000a1762c0 [ 0.000000] x27: ffff7e0000000000 x26: ffff2000099dd000 [ 0.000000] x25: ffff200009a3f960 x24: ffff200008f9c38c [ 0.000000] x23: ffff20000a9d3000 x22: ffff200009735430 [ 0.000000] x21: fffffffffffffffe x20: ffff7e0001e50420 [ 0.000000] x19: ffff7e0001e50400 x18: 0000000000001840 [ 0.000000] x17: ffffffffffff8270 x16: 0000000000001840 [ 0.000000] x15: 0000000000001920 x14: 0000000000000004 [ 0.000000] x13: 0000000000000000 x12: 0000000000000800 [ 0.000000] x11: 1ffff0012d0f89ff x10: ffff10012d0f89ff [ 0.000000] x9 : 0000000000000000 x8 : ffff8009687c5000 [ 0.000000] x7 : 0000000000000000 x6 : ffff10000f282000 [ 0.000000] x5 : 0000000000000040 x4 : fffffffffffffffe [ 0.000000] x3 : 0000000000000000 x2 : dfff200000000000 [ 0.000000] x1 : 0000000000000005 x0 : 0000000000000000 [ 0.000000] Process swapper (pid: 0, stack limit = 0x (ptrval)) [ 0.000000] Call trace: [ 0.000000] __asan_load8+0x8c/0xa8 [ 0.000000] __dump_page+0x3c/0x3b8 [ 0.000000] dump_page+0xc/0x18 [ 0.000000] kasan_init+0x2e8/0x5a8 [ 0.000000] setup_arch+0x294/0x71c [ 0.000000] start_kernel+0xdc/0x500 [ 0.000000] Code: aa0403e0 9400063c 17ffffee d343fc00 (38e26800) [ 0.000000] ---[ end trace 67064f0e9c0cc338 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- Let's fix this by using early_pfn_to_nid(), as other architectures do in their kasan init code. Note that early_pfn_to_nid acquires the nid from the memblock array, which we iterate over in kasan_init(), so this should be fine. Signed-off-by: Mark Rutland Fixes: 39d114ddc6822302 ("arm64: add KASAN support") Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/kasan_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index dabfc1ecda3d..12145874c02b 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -204,7 +204,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(lm_alias(_text)))); + early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, (void *)mod_shadow_start); @@ -224,7 +224,7 @@ void __init kasan_init(void) kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), (unsigned long)kasan_mem_to_shadow(end), - pfn_to_nid(virt_to_pfn(start))); + early_pfn_to_nid(virt_to_pfn(start))); } /* From daf70d89f80c6e1772233da9e020114b1254e7e0 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 17 Apr 2018 15:52:21 +0100 Subject: [PATCH 241/660] MIPS: memset.S: Fix return of __clear_user from Lpartial_fixup The __clear_user function is defined to return the number of bytes that could not be cleared. From the underlying memset / bzero implementation this means setting register a2 to that number on return. Currently if a page fault is triggered within the memset_partial block, the value loaded into a2 on return is meaningless. The label .Lpartial_fixup\@ is jumped to on page fault. In order to work out how many bytes failed to copy, the exception handler should find how many bytes left in the partial block (andi a2, STORMASK), add that to the partial block end address (a2), and subtract the faulting address to get the remainder. Currently it incorrectly subtracts the partial block start address (t1), which has additionally been clobbered to generate a jump target in memset_partial. Fix this by adding the block end address instead. This issue was found with the following test code: int j, k; for (j = 0; j < 512; j++) { if ((k = clear_user(NULL, j)) != j) { pr_err("clear_user (NULL %d) returned %d\n", j, k); } } Which now passes on Creator Ci40 (MIPS32) and Cavium Octeon II (MIPS64). Suggested-by: James Hogan Signed-off-by: Matt Redfearn Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/19108/ Signed-off-by: James Hogan --- arch/mips/lib/memset.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 90bcdf1224ee..184819c1d5c8 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -252,7 +252,7 @@ PTR_L t0, TI_TASK($28) andi a2, STORMASK LONG_L t0, THREAD_BUADDR(t0) - LONG_ADDU a2, t1 + LONG_ADDU a2, a0 jr ra LONG_SUBU a2, t0 From 4450dc0ae2c18a0ac6dce560215c7a1fa12122b5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 5 Apr 2018 17:26:58 +0200 Subject: [PATCH 242/660] clockevents: Fix kernel messages split across multiple lines Convert the clockevents driver from old-style printk() to pr_info() and pr_cont(), to fix split kernel messages like below: Clockevents: could not switch to one-shot mode: dummy_timer is not functional. Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Cc: Frederic Weisbecker Link: https://lkml.kernel.org/r/1522942018-14471-1-git-send-email-geert%2Brenesas@glider.be --- kernel/time/tick-oneshot.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index c1f518e7aa80..6fe615d57ebb 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -82,16 +82,15 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || !tick_device_is_functional(dev)) { - printk(KERN_INFO "Clockevents: " - "could not switch to one-shot mode:"); + pr_info("Clockevents: could not switch to one-shot mode:"); if (!dev) { - printk(" no tick device\n"); + pr_cont(" no tick device\n"); } else { if (!tick_device_is_functional(dev)) - printk(" %s is not functional.\n", dev->name); + pr_cont(" %s is not functional.\n", dev->name); else - printk(" %s does not support one-shot mode.\n", - dev->name); + pr_cont(" %s does not support one-shot mode.\n", + dev->name); } return -EINVAL; } From f0ae6a0321222864ed8675a924cc8ee2cb042c31 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Thu, 12 Apr 2018 15:57:01 +0800 Subject: [PATCH 243/660] timers: Remove stale struct tvec_base forward declaration struct tvec_base is a leftover of the original timer wheel implementation and not longer used. Remove the forward declaration. Signed-off-by: Liu Changcheng Signed-off-by: Thomas Gleixner Cc: akpm@linux-foundation.org Link: https://lkml.kernel.org/r/20180412075701.GA38952@sofia --- include/linux/timer.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 2448f9cc48a3..7b066fd38248 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -8,8 +8,6 @@ #include #include -struct tvec_base; - struct timer_list { /* * All fields that change during normal runtime grouped to the From e142aa09ed88be98395dde7acb96fb2263566b68 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 13 Apr 2018 13:27:58 +0800 Subject: [PATCH 244/660] timekeeping: Remove __current_kernel_time() The __current_kernel_time() function based on 'struct timespec' is no longer recommended for new code, and the only user of this function has been replaced by commit 6909e29fdefb ("kdb: use __ktime_get_real_seconds instead of __current_kernel_time"). Remove the obsolete interface. Signed-off-by: Baolin Wang Signed-off-by: Thomas Gleixner Cc: arnd@arndb.de Cc: sboyd@kernel.org Cc: broonie@kernel.org Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/1a9dbea7ee2cda7efe9ed330874075cf17fdbff6.1523596316.git.baolin.wang@linaro.org --- include/linux/timekeeping32.h | 3 --- kernel/time/timekeeping.c | 7 ------- 2 files changed, 10 deletions(-) diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h index af4114d5dc17..3616b4becb59 100644 --- a/include/linux/timekeeping32.h +++ b/include/linux/timekeeping32.h @@ -9,9 +9,6 @@ extern void do_gettimeofday(struct timeval *tv); unsigned long get_seconds(void); -/* does not take xtime_lock */ -struct timespec __current_kernel_time(void); - static inline struct timespec current_kernel_time(void) { struct timespec64 now = current_kernel_time64(); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ca90219a1e73..dcf7f20fcd12 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2139,13 +2139,6 @@ unsigned long get_seconds(void) } EXPORT_SYMBOL(get_seconds); -struct timespec __current_kernel_time(void) -{ - struct timekeeper *tk = &tk_core.timekeeper; - - return timespec64_to_timespec(tk_xtime(tk)); -} - struct timespec64 current_kernel_time64(void) { struct timekeeper *tk = &tk_core.timekeeper; From f53c4c20d6d38bcefd89bfcab135486cbb797884 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Tue, 27 Mar 2018 14:30:44 +0100 Subject: [PATCH 245/660] ASoC: topology: Check widget kcontrols before deref Validate the topology input before we dereference the pointer. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 5598e891b2b3..986b8b2f90fb 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -513,7 +513,7 @@ static void remove_widget(struct snd_soc_component *comp, */ if (dobj->widget.kcontrol_type == SND_SOC_TPLG_TYPE_ENUM) { /* enumerated widget mixer */ - for (i = 0; i < w->num_kcontrols; i++) { + for (i = 0; w->kcontrols != NULL && i < w->num_kcontrols; i++) { struct snd_kcontrol *kcontrol = w->kcontrols[i]; struct soc_enum *se = (struct soc_enum *)kcontrol->private_value; @@ -530,7 +530,7 @@ static void remove_widget(struct snd_soc_component *comp, } } else { /* volume mixer or bytes controls */ - for (i = 0; i < w->num_kcontrols; i++) { + for (i = 0; w->kcontrols != NULL && i < w->num_kcontrols; i++) { struct snd_kcontrol *kcontrol = w->kcontrols[i]; if (dobj->widget.kcontrol_type From 7ce2367254e84753bceb07327aaf5c953cfce117 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 17 Apr 2018 18:46:14 +0900 Subject: [PATCH 246/660] vlan: Fix reading memory beyond skb->tail in skb_vlan_tagged_multi Syzkaller spotted an old bug which leads to reading skb beyond tail by 4 bytes on vlan tagged packets. This is caused because skb_vlan_tagged_multi() did not check skb_headlen. BUG: KMSAN: uninit-value in eth_type_vlan include/linux/if_vlan.h:283 [inline] BUG: KMSAN: uninit-value in skb_vlan_tagged_multi include/linux/if_vlan.h:656 [inline] BUG: KMSAN: uninit-value in vlan_features_check include/linux/if_vlan.h:672 [inline] BUG: KMSAN: uninit-value in dflt_features_check net/core/dev.c:2949 [inline] BUG: KMSAN: uninit-value in netif_skb_features+0xd1b/0xdc0 net/core/dev.c:3009 CPU: 1 PID: 3582 Comm: syzkaller435149 Not tainted 4.16.0+ #82 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 eth_type_vlan include/linux/if_vlan.h:283 [inline] skb_vlan_tagged_multi include/linux/if_vlan.h:656 [inline] vlan_features_check include/linux/if_vlan.h:672 [inline] dflt_features_check net/core/dev.c:2949 [inline] netif_skb_features+0xd1b/0xdc0 net/core/dev.c:3009 validate_xmit_skb+0x89/0x1320 net/core/dev.c:3084 __dev_queue_xmit+0x1cb2/0x2b60 net/core/dev.c:3549 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 packet_snd net/packet/af_packet.c:2944 [inline] packet_sendmsg+0x7c57/0x8a10 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] sock_write_iter+0x3b9/0x470 net/socket.c:909 do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776 do_iter_write+0x30d/0xd40 fs/read_write.c:932 vfs_writev fs/read_write.c:977 [inline] do_writev+0x3c9/0x830 fs/read_write.c:1012 SYSC_writev+0x9b/0xb0 fs/read_write.c:1085 SyS_writev+0x56/0x80 fs/read_write.c:1082 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x43ffa9 RSP: 002b:00007fff2cff3948 EFLAGS: 00000217 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043ffa9 RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006cb018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004018d0 R13: 0000000000401960 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 packet_alloc_skb net/packet/af_packet.c:2803 [inline] packet_snd net/packet/af_packet.c:2894 [inline] packet_sendmsg+0x6444/0x8a10 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] sock_write_iter+0x3b9/0x470 net/socket.c:909 do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776 do_iter_write+0x30d/0xd40 fs/read_write.c:932 vfs_writev fs/read_write.c:977 [inline] do_writev+0x3c9/0x830 fs/read_write.c:1012 SYSC_writev+0x9b/0xb0 fs/read_write.c:1085 SyS_writev+0x56/0x80 fs/read_write.c:1082 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.") Reported-and-tested-by: syzbot+0bbe42c764feafa82c5a@syzkaller.appspotmail.com Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++-- net/core/dev.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index d11f41d5269f..78a5a90b4267 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -663,7 +663,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb) * Returns true if the skb is tagged with multiple vlan headers, regardless * of whether it is hardware accelerated or not. */ -static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) +static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) { __be16 protocol = skb->protocol; @@ -673,6 +673,9 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) if (likely(!eth_type_vlan(protocol))) return false; + if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) + return false; + veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } @@ -690,7 +693,7 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) * * Returns features without unsafe ones if the skb has multiple tags. */ -static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, +static inline netdev_features_t vlan_features_check(struct sk_buff *skb, netdev_features_t features) { if (skb_vlan_tagged_multi(skb)) { diff --git a/net/core/dev.c b/net/core/dev.c index 969462ebb296..af0558b00c6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2969,7 +2969,7 @@ netdev_features_t passthru_features_check(struct sk_buff *skb, } EXPORT_SYMBOL(passthru_features_check); -static netdev_features_t dflt_features_check(const struct sk_buff *skb, +static netdev_features_t dflt_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { From 89bda97b445bacab68e71507cc08ccacd6694474 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Tue, 17 Apr 2018 13:32:39 +0100 Subject: [PATCH 247/660] sfc: check RSS is active for filter insert For some firmware variants - specifically 'capture packed stream' - RSS filters are not valid. We must check if RSS is actually active rather than merely enabled. Fixes: 42356d9a137b ("sfc: support RSS spreading of ethtool ntuple filters") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 36f24c7e553a..83ce229f4eb7 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5264,7 +5264,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, ids = vlan->uc; } - filter_flags = efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0; + filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; /* Insert/renew filters */ for (i = 0; i < addr_count; i++) { @@ -5333,7 +5333,7 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, int rc; u16 *id; - filter_flags = efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0; + filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); From 9c438d7a3a52dcc2b9ed095cb87d3a5e83cf7e60 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Apr 2018 12:07:06 -0700 Subject: [PATCH 248/660] KEYS: DNS: limit the length of option strings Adding a dns_resolver key whose payload contains a very long option name resulted in that string being printed in full. This hit the WARN_ONCE() in set_precision() during the printk(), because printk() only supports a precision of up to 32767 bytes: precision 1000000 too large WARNING: CPU: 0 PID: 752 at lib/vsprintf.c:2189 vsnprintf+0x4bc/0x5b0 Fix it by limiting option strings (combined name + value) to a much more reasonable 128 bytes. The exact limit is arbitrary, but currently the only recognized option is formatted as "dnserror=%lu" which fits well within this limit. Also ratelimit the printks. Reproducer: perl -e 'print "#", "A" x 1000000, "\x00"' | keyctl padd dns_resolver desc @s This bug was found using syzkaller. Reported-by: Mark Rutland Fixes: 4a2d789267e0 ("DNS: If the DNS server returns an error, allow that to be cached [ver #2]") Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- net/dns_resolver/dns_key.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 8396705deffc..40c851693f77 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -91,9 +91,9 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) next_opt = memchr(opt, '#', end - opt) ?: end; opt_len = next_opt - opt; - if (!opt_len) { - printk(KERN_WARNING - "Empty option to dns_resolver key\n"); + if (opt_len <= 0 || opt_len > 128) { + pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n", + opt_len); return -EINVAL; } @@ -127,10 +127,8 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) } bad_option_value: - printk(KERN_WARNING - "Option '%*.*s' to dns_resolver key:" - " bad/missing value\n", - opt_nlen, opt_nlen, opt); + pr_warn_ratelimited("Option '%*.*s' to dns_resolver key: bad/missing value\n", + opt_nlen, opt_nlen, opt); return -EINVAL; } while (opt = next_opt + 1, opt < end); } From 0d568cd34eb04acf05c26f360d1a0f071f0bb636 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 13 Apr 2018 10:13:29 -0500 Subject: [PATCH 249/660] cifs: smb2ops: Fix NULL check in smb2_query_symlink The current code null checks variable err_buf, which is always null when it is checked, hence utf16_path is free'd and the function returns -ENOENT everytime it is called, making it impossible for the execution path to reach the following code: err_buf = err_iov.iov_base; Fix this by null checking err_iov.iov_base instead of err_buf. Also, notice that err_buf no longer needs to be initialized to NULL. Addresses-Coverity-ID: 1467876 ("Logically dead code") Fixes: 2d636199e400 ("cifs: Change SMB2_open to return an iov for the error parameter") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b4ae932ea134..38ebf3f357d2 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1452,7 +1452,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct kvec err_iov = {NULL, 0}; - struct smb2_err_rsp *err_buf = NULL; + struct smb2_err_rsp *err_buf; struct smb2_symlink_err_rsp *symlink; unsigned int sub_len; unsigned int sub_offset; @@ -1476,7 +1476,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov); - if (!rc || !err_buf) { + if (!rc || !err_iov.iov_base) { kfree(utf16_path); return -ENOENT; } From a5240cbde22c86c606c6462d32aea0648c21fdc3 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sun, 15 Apr 2018 00:58:25 +0530 Subject: [PATCH 250/660] fs: cifs: Adding new return type vm_fault_t Use new return type vm_fault_t for page_mkwrite handler. Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4bcd4e838b47..23fd430fe74a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3462,7 +3462,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) * If the page is mmap'ed into a process' page tables, then we need to make * sure that it doesn't change while being written back. */ -static int +static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; From 8bf24e8319613bbe950d4188682b3a0d9441b76b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 10 Apr 2018 16:20:53 +1000 Subject: [PATCH 251/660] selftests/filesystems: Don't run dnotify_test by default In commit ce290a19609d ("selftests: add devpts selftests"), the filesystems directory was added to the top-level selftests Makefile. That had the effect of causing the existing dnotify_test in the filesystems directory to now be run as part of the default selftests test-run. Unfortunately dnotify_test is actually an infinite loop. Fix it by moving dnotify_test to TEST_GEN_PROGS_EXTENDED, which says that it's a generated file (ie. built) but should not be run as part of the default test suite run (it's an "extended" test). While we're here cleanup a few other things, devpts_pts should be in TEST_GEN_PROGS to indicate that it's built, and with the above two changes we no longer need a custom all or clean rule. Fixes: ce290a19609d ("selftests: add devpts selftests") Signed-off-by: Michael Ellerman Acked-by: Christian brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/filesystems/Makefile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 4e6d09fb166f..5c7d7001ad37 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := dnotify_test devpts_pts -all: $(TEST_PROGS) + +TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk - -clean: - rm -fr $(TEST_PROGS) From 72961c4e6082be79825265d9193272b8a1634dec Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Apr 2018 17:08:52 -0600 Subject: [PATCH 252/660] bfq-iosched: ensure to clear bic/bfqq pointers when preparing request Even if we don't have an IO context attached to a request, we still need to clear the priv[0..1] pointers, as they could be pointing to previously used bic/bfqq structures. If we don't do so, we'll either corrupt memory on dispatching a request, or cause an imbalance in counters. Inspired by a fix from Kees. Reported-by: Oleksandr Natalenko Reported-by: Kees Cook Cc: stable@vger.kernel.org Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler") Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f0ecd98509d8..771ae9730ac6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4934,8 +4934,16 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) bool new_queue = false; bool bfqq_already_existing = false, split = false; - if (!rq->elv.icq) + /* + * Even if we don't have an icq attached, we should still clear + * the scheduler pointers, as they might point to previously + * allocated bic/bfqq structs. + */ + if (!rq->elv.icq) { + rq->elv.priv[0] = rq->elv.priv[1] = NULL; return; + } + bic = icq_to_bic(rq->elv.icq); spin_lock_irq(&bfqd->lock); From 14a8f0d88cfb58071da2e5ff99eddcef0f87e2cc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Apr 2018 19:49:07 +0200 Subject: [PATCH 253/660] i2c: Remove depends on HAS_DMA in case of platform dependency Remove dependencies on HAS_DMA where a Kconfig symbol depends on another symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST". In most cases this other symbol is an architecture or platform specific symbol, or PCI. Generic symbols and drivers without platform dependencies keep their dependencies on HAS_DMA, to prevent compiling subsystems or drivers that cannot work anyway. This simplifies the dependencies, and allows to improve compile-testing. Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index c4865b08d7fb..8d21b9825d71 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -707,7 +707,6 @@ config I2C_MPC config I2C_MT65XX tristate "MediaTek I2C adapter" depends on ARCH_MEDIATEK || COMPILE_TEST - depends on HAS_DMA help This selects the MediaTek(R) Integrated Inter Circuit bus driver for MT65xx and MT81xx. @@ -885,7 +884,6 @@ config I2C_SH7760 config I2C_SH_MOBILE tristate "SuperH Mobile I2C Controller" - depends on HAS_DMA depends on ARCH_SHMOBILE || ARCH_RENESAS || COMPILE_TEST help If you say yes to this option, support will be included for the @@ -1098,7 +1096,6 @@ config I2C_XLP9XX config I2C_RCAR tristate "Renesas R-Car I2C Controller" - depends on HAS_DMA depends on ARCH_RENESAS || COMPILE_TEST select I2C_SLAVE help From 675edea10badf5b3446275d60e1bd2105905d498 Mon Sep 17 00:00:00 2001 From: Sam Hansen Date: Fri, 13 Apr 2018 10:42:55 -0700 Subject: [PATCH 254/660] Documentation/i2c: whitespace cleanup This strips trailing whitespace in Documentation/i2c/dev-interface. Signed-off-by: Sam Hansen Signed-off-by: Wolfram Sang --- Documentation/i2c/dev-interface | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface index d04e6e4964ee..c8737d502791 100644 --- a/Documentation/i2c/dev-interface +++ b/Documentation/i2c/dev-interface @@ -9,8 +9,8 @@ i2c adapters present on your system at a given time. i2cdetect is part of the i2c-tools package. I2C device files are character device files with major device number 89 -and a minor device number corresponding to the number assigned as -explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ..., +and a minor device number corresponding to the number assigned as +explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ..., i2c-10, ...). All 256 minor device numbers are reserved for i2c. @@ -38,7 +38,7 @@ Next thing, open the device file, as follows: int file; int adapter_nr = 2; /* probably dynamically determined */ char filename[20]; - + snprintf(filename, 19, "/dev/i2c-%d", adapter_nr); file = open(filename, O_RDWR); if (file < 0) { @@ -72,7 +72,7 @@ the device supports them. Both are illustrated below. /* res contains the read word */ } - /* Using I2C Write, equivalent of + /* Using I2C Write, equivalent of i2c_smbus_write_word_data(file, reg, 0x6543) */ buf[0] = reg; buf[1] = 0x43; @@ -147,7 +147,7 @@ You can do plain i2c transactions by using read(2) and write(2) calls. You do not need to pass the address byte; instead, set it through ioctl I2C_SLAVE before you try to access the device. -You can do SMBus level transactions (see documentation file smbus-protocol +You can do SMBus level transactions (see documentation file smbus-protocol for details) through the following functions: __s32 i2c_smbus_write_quick(int file, __u8 value); __s32 i2c_smbus_read_byte(int file); @@ -158,7 +158,7 @@ for details) through the following functions: __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value); __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value); __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values); - __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length, + __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length, __u8 *values); All these transactions return -1 on failure; you can read errno to see what happened. The 'write' transactions return 0 on success; the From b50cb3eaf7c8d13e427f991f7d52b485ca0fc65f Mon Sep 17 00:00:00 2001 From: Sam Hansen Date: Fri, 13 Apr 2018 10:42:56 -0700 Subject: [PATCH 255/660] Documentation/i2c: sync docs with current state of i2c-tools Currently, Documentation/i2c/dev-interface describes the use of i2c_smbus_* helper routines as static inlined functions provided by linux/i2c-dev.h. Work has been done to refactor the linux/i2c-dev.h file in the i2c-tools project out into its own library. As a result, these docs have become stale. This patch corrects the discrepancy and directs the reader to the i2c-tools project for more information. Signed-off-by: Sam Hansen Signed-off-by: Wolfram Sang --- Documentation/i2c/dev-interface | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface index c8737d502791..f92ee1f59914 100644 --- a/Documentation/i2c/dev-interface +++ b/Documentation/i2c/dev-interface @@ -23,11 +23,6 @@ First, you need to include these two headers: #include #include -(Please note that there are two files named "i2c-dev.h" out there. One is -distributed with the Linux kernel and the other one is included in the -source tree of i2c-tools. They used to be different in content but since 2012 -they're identical. You should use "linux/i2c-dev.h"). - Now, you have to decide which adapter you want to access. You should inspect /sys/class/i2c-dev/ or run "i2cdetect -l" to decide this. Adapter numbers are assigned somewhat dynamically, so you can not @@ -140,8 +135,8 @@ ioctl(file, I2C_RDWR, struct i2c_rdwr_ioctl_data *msgset) set in each message, overriding the values set with the above ioctl's. ioctl(file, I2C_SMBUS, struct i2c_smbus_ioctl_data *args) - Not meant to be called directly; instead, use the access functions - below. + If possible, use the provided i2c_smbus_* methods described below instead + of issuing direct ioctls. You can do plain i2c transactions by using read(2) and write(2) calls. You do not need to pass the address byte; instead, set it through @@ -166,10 +161,9 @@ what happened. The 'write' transactions return 0 on success; the returns the number of values read. The block buffers need not be longer than 32 bytes. -The above functions are all inline functions, that resolve to calls to -the i2c_smbus_access function, that on its turn calls a specific ioctl -with the data in a specific format. Read the source code if you -want to know what happens behind the screens. +The above functions are made available by linking against the libi2c library, +which is provided by the i2c-tools project. See: +https://git.kernel.org/pub/scm/utils/i2c-tools/i2c-tools.git/. Implementation details From 40d802cb66ad110673e40cb3426b1f1c79645104 Mon Sep 17 00:00:00 2001 From: Sam Hansen Date: Fri, 13 Apr 2018 10:42:57 -0700 Subject: [PATCH 256/660] Documentation/i2c: adopt kernel commenting style in examples The example I2C code is rewritten to adopt the preferred kernel block commenting style. Signed-off-by: Sam Hansen Signed-off-by: Wolfram Sang --- Documentation/i2c/dev-interface | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface index f92ee1f59914..fbed645ccd75 100644 --- a/Documentation/i2c/dev-interface +++ b/Documentation/i2c/dev-interface @@ -67,8 +67,10 @@ the device supports them. Both are illustrated below. /* res contains the read word */ } - /* Using I2C Write, equivalent of - i2c_smbus_write_word_data(file, reg, 0x6543) */ + /* + * Using I2C Write, equivalent of + * i2c_smbus_write_word_data(file, reg, 0x6543) + */ buf[0] = reg; buf[1] = 0x43; buf[2] = 0x65; From fcf1fadf4c65eea6c519c773d2d9901e8ad94f5f Mon Sep 17 00:00:00 2001 From: Xidong Wang Date: Wed, 4 Apr 2018 10:38:24 +0100 Subject: [PATCH 257/660] drm/i915: Do no use kfree() to free a kmem_cache_alloc() return value Along the eb_lookup_vmas() error path, the return value from kmem_cache_alloc() was freed using kfree(). Fix it to use the proper kmem_cache_free() instead. Fixes: d1b48c1e7184 ("drm/i915: Replace execbuf vma ht with an idr") Signed-off-by: Xidong Wang Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: # v4.14+ Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180404093824.9313-1-chris@chris-wilson.co.uk (cherry picked from commit 6be1187dbffa0027ea379c53f7ca0c782515c610) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8c170db8495d..0414228cd2b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -728,7 +728,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) err = radix_tree_insert(handles_vma, handle, vma); if (unlikely(err)) { - kfree(lut); + kmem_cache_free(eb->i915->luts, lut); goto err_obj; } From e6be6bd85654dba55b97758f937c46835d961a44 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 10 Apr 2018 12:27:04 +0100 Subject: [PATCH 258/660] drm/i915/pmu: Inspect runtime PM state more carefully while estimating RC6 While thinking about sporadic failures of perf_pmu/rc6-runtime-pm* tests on some CI machines I have concluded that: a) the PMU readout of RC6 can race against runtime PM transitions, and b) there are other reasons than being runtime suspended which can cause intel_runtime_pm_get_if_in_use to fail. Therefore when estimating RC6 the code needs to assert we are indeed in suspended state, and if not, the best we can do is return the last known RC6 value. Without this check we can calculate the estimated value based on un- initialized or inappropriate internal state, which can result in over- estimation, or in any case incorrect value being returned. v2: * Re-arrange the code a bit to avoid second unlock and return branch. (Chris Wilson) v3: * Insert some strategic blank lines and improve commit msg. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 1fe699e30113 ("drm/i915/pmu: Fix sleep under atomic in RC6 readout") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010 Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: Imre Deak Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180410112704.24462-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 2924bdee21edd6785a4df1b4d17fd3cb265fddd9) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_pmu.c | 39 +++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d8feb9053e0c..f0519e31543a 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -473,20 +473,37 @@ static u64 get_rc6(struct drm_i915_private *i915) spin_lock_irqsave(&i915->pmu.lock, flags); spin_lock(&kdev->power.lock); - if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - i915->pmu.suspended_jiffies_last = - kdev->power.suspended_jiffies; + /* + * After the above branch intel_runtime_pm_get_if_in_use failed + * to get the runtime PM reference we cannot assume we are in + * runtime suspend since we can either: a) race with coming out + * of it before we took the power.lock, or b) there are other + * states than suspended which can bring us here. + * + * We need to double-check that we are indeed currently runtime + * suspended and if not we cannot do better than report the last + * known RC6 value. + */ + if (kdev->power.runtime_status == RPM_SUSPENDED) { + if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) + i915->pmu.suspended_jiffies_last = + kdev->power.suspended_jiffies; - val = kdev->power.suspended_jiffies - - i915->pmu.suspended_jiffies_last; - val += jiffies - kdev->power.accounting_timestamp; + val = kdev->power.suspended_jiffies - + i915->pmu.suspended_jiffies_last; + val += jiffies - kdev->power.accounting_timestamp; + + val = jiffies_to_nsecs(val); + val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; + + i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { + val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; + } else { + val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; + } spin_unlock(&kdev->power.lock); - - val = jiffies_to_nsecs(val); - val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - spin_unlock_irqrestore(&i915->pmu.lock, flags); } From a3520b8992e57bc94ab6ec9f95f09c6c932555fd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 11 Apr 2018 16:15:18 +0300 Subject: [PATCH 259/660] drm/i915/bios: filter out invalid DDC pins from VBT child devices The VBT contains the DDC pin to use for specific ports. Alas, sometimes the field appears to contain bogus data, and while we check for it later on in intel_gmbus_get_adapter() we fail to check the returned NULL on errors. Oops results. The simplest approach seems to be to catch and ignore the bogus DDC pins already at the VBT parsing phase, reverting to fixed per port default pins. This doesn't guarantee display working, but at least it prevents the oops. And we continue to be fuzzed by VBT. One affected machine is Dell Latitude 5590 where a BIOS upgrade added invalid DDC pins. Typical backtrace: [ 35.461411] WARN_ON(!intel_gmbus_is_valid_pin(dev_priv, pin)) [ 35.461432] WARNING: CPU: 6 PID: 411 at drivers/gpu/drm/i915/intel_i2c.c:844 intel_gmbus_get_adapter+0x32/0x37 [i915] [ 35.461437] Modules linked in: i915 ahci libahci dm_snapshot dm_bufio dm_raid raid456 async_raid6_recov async_pq raid6_pq async_xor xor async_memcpy async_tx [ 35.461445] CPU: 6 PID: 411 Comm: kworker/u16:2 Not tainted 4.16.0-rc7.x64-g1cda370ffded #1 [ 35.461447] Hardware name: Dell Inc. Latitude 5590/0MM81M, BIOS 1.1.9 03/13/2018 [ 35.461450] Workqueue: events_unbound async_run_entry_fn [ 35.461465] RIP: 0010:intel_gmbus_get_adapter+0x32/0x37 [i915] [ 35.461467] RSP: 0018:ffff9b4e43d47c40 EFLAGS: 00010286 [ 35.461469] RAX: 0000000000000000 RBX: ffff98f90639f800 RCX: ffffffffae051960 [ 35.461471] RDX: 0000000000000001 RSI: 0000000000000092 RDI: 0000000000000246 [ 35.461472] RBP: ffff98f905410000 R08: 0000004d062a83f6 R09: 00000000000003bd [ 35.461474] R10: 0000000000000031 R11: ffffffffad4eda58 R12: ffff98f905410000 [ 35.461475] R13: ffff98f9064c1000 R14: ffff9b4e43d47cf0 R15: ffff98f905410000 [ 35.461477] FS: 0000000000000000(0000) GS:ffff98f92e580000(0000) knlGS:0000000000000000 [ 35.461479] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.461481] CR2: 00007f5682359008 CR3: 00000001b700c005 CR4: 00000000003606e0 [ 35.461483] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 35.461484] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 35.461486] Call Trace: [ 35.461501] intel_hdmi_set_edid+0x37/0x27f [i915] [ 35.461515] intel_hdmi_detect+0x7c/0x97 [i915] [ 35.461518] drm_helper_probe_single_connector_modes+0xe1/0x6c0 [ 35.461521] drm_setup_crtcs+0x129/0xa6a [ 35.461523] ? __switch_to_asm+0x34/0x70 [ 35.461525] ? __switch_to_asm+0x34/0x70 [ 35.461527] ? __switch_to_asm+0x40/0x70 [ 35.461528] ? __switch_to_asm+0x34/0x70 [ 35.461529] ? __switch_to_asm+0x40/0x70 [ 35.461531] ? __switch_to_asm+0x34/0x70 [ 35.461532] ? __switch_to_asm+0x40/0x70 [ 35.461534] ? __switch_to_asm+0x34/0x70 [ 35.461536] __drm_fb_helper_initial_config_and_unlock+0x34/0x46f [ 35.461538] ? __switch_to_asm+0x40/0x70 [ 35.461541] ? _cond_resched+0x10/0x33 [ 35.461557] intel_fbdev_initial_config+0xf/0x1c [i915] [ 35.461560] async_run_entry_fn+0x2e/0xf5 [ 35.461563] process_one_work+0x15b/0x364 [ 35.461565] worker_thread+0x2c/0x3a0 [ 35.461567] ? process_one_work+0x364/0x364 [ 35.461568] kthread+0x10c/0x122 [ 35.461570] ? _kthread_create_on_node+0x5d/0x5d [ 35.461572] ret_from_fork+0x35/0x40 [ 35.461574] Code: 74 16 89 f6 48 8d 04 b6 48 c1 e0 05 48 29 f0 48 8d 84 c7 e8 11 00 00 c3 48 c7 c6 b0 19 1e c0 48 c7 c7 64 8a 1c c0 e8 47 88 ed ec <0f> 0b 31 c0 c3 8b 87 a4 04 00 00 80 e4 fc 09 c6 89 b7 a4 04 00 [ 35.461604] WARNING: CPU: 6 PID: 411 at drivers/gpu/drm/i915/intel_i2c.c:844 intel_gmbus_get_adapter+0x32/0x37 [i915] [ 35.461606] ---[ end trace 4fe1e63e2dd93373 ]--- [ 35.461609] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [ 35.461613] IP: i2c_transfer+0x4/0x86 [ 35.461614] PGD 0 P4D 0 [ 35.461616] Oops: 0000 [#1] SMP PTI [ 35.461618] Modules linked in: i915 ahci libahci dm_snapshot dm_bufio dm_raid raid456 async_raid6_recov async_pq raid6_pq async_xor xor async_memcpy async_tx [ 35.461624] CPU: 6 PID: 411 Comm: kworker/u16:2 Tainted: G W 4.16.0-rc7.x64-g1cda370ffded #1 [ 35.461625] Hardware name: Dell Inc. Latitude 5590/0MM81M, BIOS 1.1.9 03/13/2018 [ 35.461628] Workqueue: events_unbound async_run_entry_fn [ 35.461630] RIP: 0010:i2c_transfer+0x4/0x86 [ 35.461631] RSP: 0018:ffff9b4e43d47b30 EFLAGS: 00010246 [ 35.461633] RAX: ffff9b4e43d47b6e RBX: 0000000000000005 RCX: 0000000000000001 [ 35.461635] RDX: 0000000000000002 RSI: ffff9b4e43d47b80 RDI: 0000000000000000 [ 35.461636] RBP: ffff9b4e43d47bd8 R08: 0000004d062a83f6 R09: 00000000000003bd [ 35.461638] R10: 0000000000000031 R11: ffffffffad4eda58 R12: 0000000000000002 [ 35.461639] R13: 0000000000000001 R14: ffff9b4e43d47b6f R15: ffff9b4e43d47c07 [ 35.461641] FS: 0000000000000000(0000) GS:ffff98f92e580000(0000) knlGS:0000000000000000 [ 35.461643] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.461645] CR2: 0000000000000010 CR3: 00000001b700c005 CR4: 00000000003606e0 [ 35.461646] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 35.461647] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 35.461649] Call Trace: [ 35.461652] drm_do_probe_ddc_edid+0xb3/0x128 [ 35.461654] drm_get_edid+0xe5/0x38d [ 35.461669] intel_hdmi_set_edid+0x45/0x27f [i915] [ 35.461684] intel_hdmi_detect+0x7c/0x97 [i915] [ 35.461687] drm_helper_probe_single_connector_modes+0xe1/0x6c0 [ 35.461689] drm_setup_crtcs+0x129/0xa6a [ 35.461691] ? __switch_to_asm+0x34/0x70 [ 35.461693] ? __switch_to_asm+0x34/0x70 [ 35.461694] ? __switch_to_asm+0x40/0x70 [ 35.461696] ? __switch_to_asm+0x34/0x70 [ 35.461697] ? __switch_to_asm+0x40/0x70 [ 35.461698] ? __switch_to_asm+0x34/0x70 [ 35.461700] ? __switch_to_asm+0x40/0x70 [ 35.461701] ? __switch_to_asm+0x34/0x70 [ 35.461703] __drm_fb_helper_initial_config_and_unlock+0x34/0x46f [ 35.461705] ? __switch_to_asm+0x40/0x70 [ 35.461707] ? _cond_resched+0x10/0x33 [ 35.461724] intel_fbdev_initial_config+0xf/0x1c [i915] [ 35.461727] async_run_entry_fn+0x2e/0xf5 [ 35.461729] process_one_work+0x15b/0x364 [ 35.461731] worker_thread+0x2c/0x3a0 [ 35.461733] ? process_one_work+0x364/0x364 [ 35.461734] kthread+0x10c/0x122 [ 35.461736] ? _kthread_create_on_node+0x5d/0x5d [ 35.461738] ret_from_fork+0x35/0x40 [ 35.461739] Code: 5c fa e1 ad 48 89 df e8 ea fb ff ff e9 2a ff ff ff 0f 1f 44 00 00 31 c0 e9 43 fd ff ff 31 c0 45 31 e4 e9 c5 fd ff ff 41 54 55 53 <48> 8b 47 10 48 83 78 10 00 74 70 41 89 d4 48 89 f5 48 89 fb 65 [ 35.461756] RIP: i2c_transfer+0x4/0x86 RSP: ffff9b4e43d47b30 [ 35.461757] CR2: 0000000000000010 [ 35.461759] ---[ end trace 4fe1e63e2dd93374 ]--- Based on a patch by Fei Li. v2: s/reverting/sticking/ (Chris) Cc: stable@vger.kernel.org Cc: Fei Li Co-developed-by: Fei Li Reported-by: Pavel Nakonechnyi Reported-and-tested-by: Seweryn Kokot Reported-and-tested-by: Laszlo Valko Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105549 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105961 Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180411131519.9091-1-jani.nikula@intel.com (cherry picked from commit f212bf9abe5de9f938fecea7df07046e74052dde) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_bios.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index c5c7530ba157..447b721c3be9 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1256,7 +1256,6 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, return; aux_channel = child->aux_channel; - ddc_pin = child->ddc_pin; is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT; @@ -1303,9 +1302,15 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { - info->alternate_ddc_pin = map_ddc_pin(dev_priv, ddc_pin); - - sanitize_ddc_pin(dev_priv, port); + ddc_pin = map_ddc_pin(dev_priv, child->ddc_pin); + if (intel_gmbus_is_valid_pin(dev_priv, ddc_pin)) { + info->alternate_ddc_pin = ddc_pin; + sanitize_ddc_pin(dev_priv, port); + } else { + DRM_DEBUG_KMS("Port %c has invalid DDC pin %d, " + "sticking to defaults\n", + port_name(port), ddc_pin); + } } if (is_dp) { From 4a0559ed99189f259e92ed34f60dd51688f1bc40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Apr 2018 10:12:33 +0100 Subject: [PATCH 260/660] drm/i915: Call i915_perf_fini() on init_hw error unwind We have to cleanup after i915_perf_init(), even on the error path, as it passes a pointer into the module to the sysfs core. If we fail to unregister the sysctl table, we leave a dangling pointer which then may explode anytime later. Fixes: 9f9b2792b6d3 ("drm/i915/perf: reuse timestamp frequency from device info") Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Cc: Matthew Auld Reviewed-by: Lionel Landwerlin Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20180414091233.32224-1-chris@chris-wilson.co.uk (cherry picked from commit 9f172f6fbd243759c808d97bd83c95e49325b2c9) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 07c07d55398b..be8555049c93 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1102,30 +1102,32 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) ret = i915_ggtt_probe_hw(dev_priv); if (ret) - return ret; + goto err_perf; - /* WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. */ + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ ret = i915_kick_out_firmware_fb(dev_priv); if (ret) { DRM_ERROR("failed to remove conflicting framebuffer drivers\n"); - goto out_ggtt; + goto err_ggtt; } ret = i915_kick_out_vgacon(dev_priv); if (ret) { DRM_ERROR("failed to remove conflicting VGA console\n"); - goto out_ggtt; + goto err_ggtt; } ret = i915_ggtt_init_hw(dev_priv); if (ret) - return ret; + goto err_ggtt; ret = i915_ggtt_enable_hw(dev_priv); if (ret) { DRM_ERROR("failed to enable GGTT\n"); - goto out_ggtt; + goto err_ggtt; } pci_set_master(pdev); @@ -1136,7 +1138,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto out_ggtt; + goto err_ggtt; } } @@ -1154,7 +1156,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto out_ggtt; + goto err_ggtt; } } @@ -1187,13 +1189,14 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) ret = intel_gvt_init(dev_priv); if (ret) - goto out_ggtt; + goto err_ggtt; return 0; -out_ggtt: +err_ggtt: i915_ggtt_cleanup_hw(dev_priv); - +err_perf: + i915_perf_fini(dev_priv); return ret; } From b4615730530be85fc45ab4631c2ad6d8e2d0b97d Mon Sep 17 00:00:00 2001 From: Gaurav K Singh Date: Tue, 17 Apr 2018 23:52:18 +0530 Subject: [PATCH 261/660] drm/i915/audio: Fix audio detection issue on GLK On Geminilake, sometimes audio card is not getting detected after reboot. This is a spurious issue happening on Geminilake. HW codec and HD audio controller link was going out of sync for which there was a fix in i915 driver but was not getting invoked for GLK. Extending this fix to GLK as well. Tested by Du,Wenkai on GLK board. Bspec: 21829 v2: Instead of checking GEN9_BC, BXT and GLK macros, use IS_GEN9 macro (Jani N) Cc: # b651bd2a3ae3 ("drm/i915/audio: Fix audio enumeration issue on BXT") Cc: Signed-off-by: Gaurav K Singh Reviewed-by: Abhay Kumar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1523989338-29677-1-git-send-email-gaurav.k.singh@intel.com (cherry picked from commit 8221229046e862977ae93ec9d34aa583fbd10397) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 709d6ca68074..3ea566f99450 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -729,7 +729,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_GEN9_BC(dev_priv)) + if (!IS_GEN9(dev_priv)) return; i915_audio_component_get_power(kdev); From 7eb2c4dd54ff841f2fe509a84973eb25fa20bda2 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 16 Apr 2018 18:53:09 +0300 Subject: [PATCH 262/660] drm/i915: Fix LSPCON TMDS output buffer enabling from low-power state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LSPCON adapters in low-power state may ignore the first I2C write during TMDS output buffer enabling, resulting in a blank screen even with an otherwise enabled pipe. Fix this by reading back and validating the written value a few times. The problem was noticed on GLK machines with an onboard LSPCON adapter after entering/exiting DC5 power state. Doing an I2C read of the adapter ID as the first transaction - instead of the I2C write to enable the TMDS buffers - returns the correct value. Based on this we assume that the transaction itself is sent properly, it's only the adapter that is not ready for some reason to accept this first write after waking from low-power state. In my case the second I2C write attempt always succeeded. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105854 Cc: Clinton Taylor Cc: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Imre Deak Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180416155309.11100-1-imre.deak@intel.com --- drivers/gpu/drm/drm_dp_dual_mode_helper.c | 39 +++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c index 02a50929af67..e7f4fe2848a5 100644 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c @@ -350,19 +350,44 @@ int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type, { uint8_t tmds_oen = enable ? 0 : DP_DUAL_MODE_TMDS_DISABLE; ssize_t ret; + int retry; if (type < DRM_DP_DUAL_MODE_TYPE2_DVI) return 0; - ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, - &tmds_oen, sizeof(tmds_oen)); - if (ret) { - DRM_DEBUG_KMS("Failed to %s TMDS output buffers\n", - enable ? "enable" : "disable"); - return ret; + /* + * LSPCON adapters in low-power state may ignore the first write, so + * read back and verify the written value a few times. + */ + for (retry = 0; retry < 3; retry++) { + uint8_t tmp; + + ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmds_oen, sizeof(tmds_oen)); + if (ret) { + DRM_DEBUG_KMS("Failed to %s TMDS output buffers (%d attempts)\n", + enable ? "enable" : "disable", + retry + 1); + return ret; + } + + ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmp, sizeof(tmp)); + if (ret) { + DRM_DEBUG_KMS("I2C read failed during TMDS output buffer %s (%d attempts)\n", + enable ? "enabling" : "disabling", + retry + 1); + return ret; + } + + if (tmp == tmds_oen) + return 0; } - return 0; + DRM_DEBUG_KMS("I2C write value mismatch during TMDS output buffer %s\n", + enable ? "enabling" : "disabling"); + + return -EIO; } EXPORT_SYMBOL(drm_dp_dual_mode_set_tmds_output); From 8b8b5903d50d6e4c5fd06272022b25a11c3214cf Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 6 Mar 2018 15:43:54 +0100 Subject: [PATCH 263/660] dt-bindings: thermal: remove no longer needed samsung thermal properties Remove documentation for longer needed samsung thermal properties. Signed-off-by: Bartlomiej Zolnierkiewicz Reviewed-by: Rob Herring Signed-off-by: Eduardo Valentin --- .../bindings/thermal/exynos-thermal.txt | 23 ++++--------------- 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt index 1b596fd38dc4..b957acff57aa 100644 --- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt @@ -49,19 +49,6 @@ on the SoC (only first trip points defined in DT will be configured): - samsung,exynos5433-tmu: 8 - samsung,exynos7-tmu: 8 -Following properties are mandatory (depending on SoC): -- samsung,tmu_gain: Gain value for internal TMU operation. -- samsung,tmu_reference_voltage: Value of TMU IP block's reference voltage -- samsung,tmu_noise_cancel_mode: Mode for noise cancellation -- samsung,tmu_efuse_value: Default level of temperature - it is needed when - in factory fusing produced wrong value -- samsung,tmu_min_efuse_value: Minimum temperature fused value -- samsung,tmu_max_efuse_value: Maximum temperature fused value -- samsung,tmu_first_point_trim: First point trimming value -- samsung,tmu_second_point_trim: Second point trimming value -- samsung,tmu_default_temp_offset: Default temperature offset -- samsung,tmu_cal_type: Callibration type - ** Optional properties: - vtmu-supply: This entry is optional and provides the regulator node supplying @@ -78,7 +65,7 @@ Example 1): clocks = <&clock 383>; clock-names = "tmu_apbif"; vtmu-supply = <&tmu_regulator_node>; - #include "exynos4412-tmu-sensor-conf.dtsi" + #thermal-sensor-cells = <0>; }; Example 2): @@ -89,7 +76,7 @@ Example 2): interrupts = <0 58 0>; clocks = <&clock 21>; clock-names = "tmu_apbif"; - #include "exynos5440-tmu-sensor-conf.dtsi" + #thermal-sensor-cells = <0>; }; Example 3): (In case of Exynos5420 "with misplaced TRIMINFO register") @@ -99,7 +86,7 @@ Example 3): (In case of Exynos5420 "with misplaced TRIMINFO register") interrupts = <0 184 0>; clocks = <&clock 318>, <&clock 318>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #thermal-sensor-cells = <0>; }; tmu_cpu3: tmu@1006c000 { @@ -108,7 +95,7 @@ Example 3): (In case of Exynos5420 "with misplaced TRIMINFO register") interrupts = <0 185 0>; clocks = <&clock 318>, <&clock 319>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #thermal-sensor-cells = <0>; }; tmu_gpu: tmu@100a0000 { @@ -117,7 +104,7 @@ Example 3): (In case of Exynos5420 "with misplaced TRIMINFO register") interrupts = <0 215 0>; clocks = <&clock 319>, <&clock 318>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #thermal-sensor-cells = <0>; }; Note: For multi-instance tmu each instance should have an alias correctly From e04907dbc25930b88ee2328fe692c776f63ddf2c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 9 Feb 2018 14:28:10 +0530 Subject: [PATCH 264/660] dt-bindings: thermal: Remove "cooling-{min|max}-level" properties The "cooling-min-level" and "cooling-max-level" properties are not parsed by any part of kernel currently and the max cooling state of a CPU cooling device is found by referring to the cpufreq table instead. Remove the unused bindings. Signed-off-by: Viresh Kumar Reviewed-by: Rob Herring Signed-off-by: Eduardo Valentin --- .../devicetree/bindings/thermal/thermal.txt | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt index 1719d47a5e2f..cc553f0952c5 100644 --- a/Documentation/devicetree/bindings/thermal/thermal.txt +++ b/Documentation/devicetree/bindings/thermal/thermal.txt @@ -55,8 +55,7 @@ of heat dissipation). For example a fan's cooling states correspond to the different fan speeds possible. Cooling states are referred to by single unsigned integers, where larger numbers mean greater heat dissipation. The precise set of cooling states associated with a device -(as referred to by the cooling-min-level and cooling-max-level -properties) should be defined in a particular device's binding. +should be defined in a particular device's binding. For more examples of cooling devices, refer to the example sections below. Required properties: @@ -69,15 +68,6 @@ Required properties: See Cooling device maps section below for more details on how consumers refer to cooling devices. -Optional properties: -- cooling-min-level: An integer indicating the smallest - Type: unsigned cooling state accepted. Typically 0. - Size: one cell - -- cooling-max-level: An integer indicating the largest - Type: unsigned cooling state accepted. - Size: one cell - * Trip points The trip node is a node to describe a point in the temperature domain @@ -226,8 +216,6 @@ cpus { 396000 950000 198000 850000 >; - cooling-min-level = <0>; - cooling-max-level = <3>; #cooling-cells = <2>; /* min followed by max */ }; ... @@ -241,8 +229,6 @@ cpus { */ fan0: fan@48 { ... - cooling-min-level = <0>; - cooling-max-level = <9>; #cooling-cells = <2>; /* min followed by max */ }; }; From b2d71b3cda19831ec67f49d7c6ba0214d9367b29 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Apr 2018 16:45:01 +0100 Subject: [PATCH 265/660] arm64: signal: don't force known signals to SIGKILL Since commit: a7e6f1ca90354a31 ("arm64: signal: Force SIGKILL for unknown signals in force_signal_inject") ... any signal which is not SIGKILL will be upgraded to a SIGKILL be force_signal_inject(). This includes signals we do expect, such as SIGILL triggered by do_undefinstr(). Fix the check to use a logical AND rather than a logical OR, permitting signals whose layout is SIL_FAULT. Fixes: a7e6f1ca90354a31 ("arm64: signal: Force SIGKILL for unknown signals in force_signal_inject") Cc: Will Deacon Reviewed-by: Dave Martin Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index ba964da31a25..1cb2749a72bf 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -366,7 +366,7 @@ void force_signal_inject(int signal, int code, unsigned long address) } /* Force signals we don't understand to SIGKILL */ - if (WARN_ON(signal != SIGKILL || + if (WARN_ON(signal != SIGKILL && siginfo_layout(signal, code) != SIL_FAULT)) { signal = SIGKILL; } From 9de4ee40547fd315d4a0ed1dd15a2fa3559ad707 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 Apr 2018 12:51:31 +0300 Subject: [PATCH 266/660] cdrom: information leak in cdrom_ioctl_media_changed() This cast is wrong. "cdi->capacity" is an int and "arg" is an unsigned long. The way the check is written now, if one of the high 32 bits is set then we could read outside the info->slots[] array. This bug is pretty old and it predates git. Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 8327478effd0..bfc566d3f31a 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2371,7 +2371,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || arg == CDSL_CURRENT) return media_changed(cdi, 1); - if ((unsigned int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; info = kmalloc(sizeof(*info), GFP_KERNEL); From 0ce9144471de9ee09306ca0127e7cd27521ccc3f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 18 Apr 2018 14:08:27 +1000 Subject: [PATCH 267/660] block: add blk_queue_fua() helper function So we can check FUA support status from the iomap direct IO code. Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9af3e0f430bc..c362aadfe036 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -737,6 +737,7 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) #define blk_queue_preempt_only(q) \ test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) +#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) extern int blk_set_preempt_only(struct request_queue *q); extern void blk_clear_preempt_only(struct request_queue *q); From 44f06ba8297c7e9dfd0e49b40cbe119113cca094 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 Apr 2018 17:22:23 +0200 Subject: [PATCH 268/660] udf: Fix leak of UTF-16 surrogates into encoded strings OSTA UDF specification does not mention whether the CS0 charset in case of two bytes per character encoding should be treated in UTF-16 or UCS-2. The sample code in the standard does not treat UTF-16 surrogates in any special way but on systems such as Windows which work in UTF-16 internally, filenames would be treated as being in UTF-16 effectively. In Linux it is more difficult to handle characters outside of Base Multilingual plane (beyond 0xffff) as NLS framework works with 2-byte characters only. Just make sure we don't leak UTF-16 surrogates into the resulting string when loading names from the filesystem for now. CC: stable@vger.kernel.org # >= v4.6 Reported-by: Mingye Wang Signed-off-by: Jan Kara --- fs/udf/unicode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index f897e55f2cd0..16a8ad21b77e 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -28,6 +28,9 @@ #include "udf_sb.h" +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 + static int udf_uni2char_utf8(wchar_t uni, unsigned char *out, int boundlen) @@ -37,6 +40,9 @@ static int udf_uni2char_utf8(wchar_t uni, if (boundlen <= 0) return -ENAMETOOLONG; + if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) + return -EINVAL; + if (uni < 0x80) { out[u_len++] = (unsigned char)uni; } else if (uni < 0x800) { From 946b81da114b8ba5c74bb01e57c0c6eca2bdc801 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 18 Apr 2018 08:37:18 -0600 Subject: [PATCH 269/660] blkcg: don't hold blkcg lock when deactivating policy As described in the comment of blkcg_activate_policy(), *Update of each blkg is protected by both queue and blkcg locks so that holding either lock and testing blkcg_policy_enabled() is always enough for dereferencing policy data.* with queue lock held, there is no need to hold blkcg lock in blkcg_deactivate_policy(). Similar case is in blkcg_activate_policy(), which has removed holding of blkcg lock in commit 4c55f4f9ad3001ac1fefdd8d8ca7641d18558e23. Signed-off-by: Jiang Biao Signed-off-by: Wen Yang CC: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1c16694ae145..21bc449d01c0 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1410,9 +1410,6 @@ void blkcg_deactivate_policy(struct request_queue *q, __clear_bit(pol->plid, q->blkcg_pols); list_for_each_entry(blkg, &q->blkg_list, q_node) { - /* grab blkcg lock too while removing @pd from @blkg */ - spin_lock(&blkg->blkcg->lock); - if (blkg->pd[pol->plid]) { if (!blkg->pd[pol->plid]->offline && pol->pd_offline_fn) { @@ -1422,8 +1419,6 @@ void blkcg_deactivate_policy(struct request_queue *q, pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } - - spin_unlock(&blkg->blkcg->lock); } spin_unlock_irq(q->queue_lock); From a514d63882c3d2063b21b865447266ebcb18b04c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 22 Dec 2017 16:06:39 +0800 Subject: [PATCH 270/660] btrfs: qgroup: Commit transaction in advance to reduce early EDQUOT Unlike previous method that tries to commit transaction inside qgroup_reserve(), this time we will try to commit transaction using fs_info->transaction_kthread to avoid nested transaction and no need to worry about locking context. Since it's an asynchronous function call and we won't wait for transaction commit, unlike previous method, we must call it before we hit the qgroup limit. So this patch will use the ratio and size of qgroup meta_pertrans reservation as indicator to check if we should trigger a transaction commit. (meta_prealloc won't be cleaned in transaction committ, it's useless anyway) Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/qgroup.c | 43 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.c | 1 + fs/btrfs/transaction.h | 14 ++++++++++++++ 5 files changed, 63 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5474ef14d6e6..ec84e2dabb04 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -714,6 +714,12 @@ struct btrfs_delayed_root; */ #define BTRFS_FS_EXCL_OP 16 +/* + * To info transaction_kthread we need an immediate commit so it doesn't + * need to wait for commit_interval + */ +#define BTRFS_FS_NEED_ASYNC_COMMIT 17 + struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ac8b1d21baf..60caa68c3618 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && + !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && (now < cur->start_time || now - cur->start_time < fs_info->commit_interval)) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 09c7e4fd550f..9fb758d5077a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ctree.h" #include "transaction.h" @@ -2375,8 +2376,21 @@ out: return ret; } -static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) +/* + * Two limits to commit transaction in advance. + * + * For RATIO, it will be 1/RATIO of the remaining limit + * (excluding data and prealloc meta) as threshold. + * For SIZE, it will be in byte unit as threshold. + */ +#define QGROUP_PERTRANS_RATIO 32 +#define QGROUP_PERTRANS_SIZE SZ_32M +static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, + const struct btrfs_qgroup *qg, u64 num_bytes) { + u64 limit; + u64 threshold; + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) return false; @@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) return false; + /* + * Even if we passed the check, it's better to check if reservation + * for meta_pertrans is pushing us near limit. + * If there is too much pertrans reservation or it's near the limit, + * let's try commit transaction to free some, using transaction_kthread + */ + if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | + BTRFS_QGROUP_LIMIT_MAX_EXCL))) { + if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) + limit = qg->max_excl; + else + limit = qg->max_rfer; + threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] - + qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) / + QGROUP_PERTRANS_RATIO; + threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE); + + /* + * Use transaction_kthread to commit transaction, so we no + * longer need to bother nested transaction nor lock context. + */ + if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold) + btrfs_commit_transaction_locksafe(fs_info); + } + return true; } @@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, qg = unode_aux_to_qgroup(unode); - if (enforce && !qgroup_check_limits(qg, num_bytes)) { + if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { ret = -EDQUOT; goto out; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 63fdcab64b01..c944b4769e3c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ cur_trans->state = TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); + clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); spin_lock(&fs_info->trans_lock); list_del_init(&cur_trans->list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c88fccd80bc5..d8c0826bc2c7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans); int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, int wait_for_unblock); + +/* + * Try to commit transaction asynchronously, so this is safe to call + * even holding a spinlock. + * + * It's done by informing transaction_kthread to commit transaction without + * waiting for commit interval. + */ +static inline void btrfs_commit_transaction_locksafe( + struct btrfs_fs_info *fs_info) +{ + set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); + wake_up_process(fs_info->transaction_kthread); +} int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); void btrfs_throttle(struct btrfs_fs_info *fs_info); From ff6bc37eb7f6e7b052e50c13a480e1080b3ec07a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 21 Dec 2017 13:42:04 +0800 Subject: [PATCH 271/660] btrfs: qgroup: Use independent and accurate per inode qgroup rsv Unlike reservation calculation used in inode rsv for metadata, qgroup doesn't really need to care about things like csum size or extent usage for the whole tree COW. Qgroups care more about net change of the extent usage. That's to say, if we're going to insert one file extent, it will mostly find its place in COWed tree block, leaving no change in extent usage. Or causing a leaf split, resulting in one new net extent and increasing qgroup number by nodesize. Or in an even more rare case, increase the tree level, increasing qgroup number by 2 * nodesize. So here instead of using the complicated calculation for extent allocator, which cares more about accuracy and no error, qgroup doesn't need that over-estimated reservation. This patch will maintain 2 new members in btrfs_block_rsv structure for qgroup, using much smaller calculation for qgroup rsv, reducing false EDQUOT. Signed-off-by: David Sterba Signed-off-by: Qu Wenruo --- fs/btrfs/ctree.h | 19 ++++++++++++++ fs/btrfs/extent-tree.c | 57 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ec84e2dabb04..2771cc56a622 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,25 @@ struct btrfs_block_rsv { unsigned short full; unsigned short type; unsigned short failfast; + + /* + * Qgroup equivalent for @size @reserved + * + * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care + * about things like csum size nor how many tree blocks it will need to + * reserve. + * + * Qgroup cares more about net change of the extent usage. + * + * So for one newly inserted file extent, in worst case it will cause + * leaf split and level increase, nodesize for each file extent is + * already too much. + * + * In short, qgroup_size/reserved is the upper limit of possible needed + * qgroup metadata reservation. + */ + u64 qgroup_rsv_size; + u64 qgroup_rsv_reserved; }; /* diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5bdb5636d552..055494ddcace 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5560,14 +5560,18 @@ again: static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, - struct btrfs_block_rsv *dest, u64 num_bytes) + struct btrfs_block_rsv *dest, u64 num_bytes, + u64 *qgroup_to_release_ret) { struct btrfs_space_info *space_info = block_rsv->space_info; + u64 qgroup_to_release = 0; u64 ret; spin_lock(&block_rsv->lock); - if (num_bytes == (u64)-1) + if (num_bytes == (u64)-1) { num_bytes = block_rsv->size; + qgroup_to_release = block_rsv->qgroup_rsv_size; + } block_rsv->size -= num_bytes; if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; @@ -5576,6 +5580,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } else { num_bytes = 0; } + if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + qgroup_to_release = block_rsv->qgroup_rsv_reserved - + block_rsv->qgroup_rsv_size; + block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; + } else { + qgroup_to_release = 0; + } spin_unlock(&block_rsv->lock); ret = num_bytes; @@ -5598,6 +5609,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, space_info_add_old_bytes(fs_info, space_info, num_bytes); } + if (qgroup_to_release_ret) + *qgroup_to_release_ret = qgroup_to_release; return ret; } @@ -5739,17 +5752,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, struct btrfs_root *root = inode->root; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 num_bytes = 0; + u64 qgroup_num_bytes = 0; int ret = -ENOSPC; spin_lock(&block_rsv->lock); if (block_rsv->reserved < block_rsv->size) num_bytes = block_rsv->size - block_rsv->reserved; + if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size) + qgroup_num_bytes = block_rsv->qgroup_rsv_size - + block_rsv->qgroup_rsv_reserved; spin_unlock(&block_rsv->lock); if (num_bytes == 0) return 0; - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true); if (ret) return ret; ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); @@ -5757,7 +5774,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, block_rsv_add_bytes(block_rsv, num_bytes, 0); trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), num_bytes, 1); - } + + /* Don't forget to increase qgroup_rsv_reserved */ + spin_lock(&block_rsv->lock); + block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; + spin_unlock(&block_rsv->lock); + } else + btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); return ret; } @@ -5778,20 +5801,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 released = 0; + u64 qgroup_to_release = 0; /* * Since we statically set the block_rsv->size we just want to say we * are releasing 0 bytes, and then we'll just get the reservation over * the size free'd. */ - released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); + released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0, + &qgroup_to_release); if (released > 0) trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), released, 0); if (qgroup_free) - btrfs_qgroup_free_meta_prealloc(inode->root, released); + btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); else - btrfs_qgroup_convert_reserved_meta(inode->root, released); + btrfs_qgroup_convert_reserved_meta(inode->root, + qgroup_to_release); } void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, @@ -5803,7 +5829,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, if (global_rsv == block_rsv || block_rsv->space_info != global_rsv->space_info) global_rsv = NULL; - block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes); + block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL); } static void update_global_block_rsv(struct btrfs_fs_info *fs_info) @@ -5883,7 +5909,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) static void release_global_block_rsv(struct btrfs_fs_info *fs_info) { block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, - (u64)-1); + (u64)-1, NULL); WARN_ON(fs_info->trans_block_rsv.size > 0); WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); @@ -5907,7 +5933,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) WARN_ON_ONCE(!list_empty(&trans->new_bgs)); block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, - trans->chunk_bytes_reserved); + trans->chunk_bytes_reserved, NULL); trans->chunk_bytes_reserved = 0; } @@ -6012,6 +6038,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, { struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; + u64 qgroup_rsv_size = 0; u64 csum_leaves; unsigned outstanding_extents; @@ -6024,9 +6051,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, inode->csum_bytes); reserve_size += btrfs_calc_trans_metadata_size(fs_info, csum_leaves); + /* + * For qgroup rsv, the calculation is very simple: + * account one nodesize for each outstanding extent + * + * This is overestimating in most cases. + */ + qgroup_rsv_size = outstanding_extents * fs_info->nodesize; spin_lock(&block_rsv->lock); block_rsv->size = reserve_size; + block_rsv->qgroup_rsv_size = qgroup_rsv_size; spin_unlock(&block_rsv->lock); } @@ -8405,7 +8440,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u32 blocksize) { block_rsv_add_bytes(block_rsv, blocksize, 0); - block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); + block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); } /* From f218ea6c4792e0fabba0195f2f866d0a3b58431e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 17 Apr 2018 16:52:45 +0800 Subject: [PATCH 272/660] btrfs: delayed-inode: Remove wrong qgroup meta reservation calls Commit 4f5427ccce5d ("btrfs: delayed-inode: Use new qgroup meta rsv for delayed inode and item") merged into mainline was not latest version submitted to the mail list in Dec 2017. Which lacks the following fixes: 1) Remove btrfs_qgroup_convert_reserved_meta() call in btrfs_delayed_item_release_metadata() 2) Remove btrfs_qgroup_reserve_meta_prealloc() call in btrfs_delayed_inode_reserve_metadata() Those fixes will resolve unexpected EDQUOT problems. Fixes: 4f5427ccce5d ("btrfs: delayed-inode: Use new qgroup meta rsv for delayed inode and item") Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 06ec8ab6d9ba..a8d492dbd3e7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, dst_rsv = &fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + + /* + * Here we migrate space rsv from transaction rsv, since have already + * reserved space when starting a transaction. So no need to reserve + * qgroup space here. + */ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); if (!ret) { trace_btrfs_space_reservation(fs_info, "delayed_item", @@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, return; rsv = &fs_info->delayed_block_rsv; - btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved); + /* + * Check btrfs_delayed_item_reserve_metadata() to see why we don't need + * to release/reserve qgroup space. + */ trace_btrfs_space_reservation(fs_info, "delayed_item", item->key.objectid, item->bytes_reserved, 0); @@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata( num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); - if (ret < 0) - return ret; /* * btrfs_dirty_inode will update the inode under btrfs_join_transaction * which doesn't reserve space for speed. This is a problem since we @@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { + ret = btrfs_qgroup_reserve_meta_prealloc(root, + fs_info->nodesize, true); + if (ret < 0) + return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, BTRFS_RESERVE_NO_FLUSH); /* @@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata( "delayed_inode", btrfs_ino(inode), num_bytes, 1); + } else { + btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); } return ret; } From 336a8bb8e36a273a802a54b2e673c777c9c62fb1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 17 Apr 2018 18:43:58 +0800 Subject: [PATCH 273/660] btrfs: Fix wrong btrfs_delalloc_release_extents parameter Commit 43b18595d660 ("btrfs: qgroup: Use separate meta reservation type for delalloc") merged into mainline is not the latest version submitted to mail list in Dec 2017. It has a fatal wrong @qgroup_free parameter, which results increasing qgroup metadata pertrans reserved space, and causing a lot of early EDQUOT. Fix it by applying the correct diff on top of current branch. Fixes: 43b18595d660 ("btrfs: qgroup: Use separate meta reservation type for delalloc") Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0167a9c97c9c..f660ba1e5e58 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1748,7 +1748,7 @@ again: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, - (ret != 0)); + true); if (ret) { btrfs_drop_pages(pages, num_pages); break; From b32e56e5a87a1f9243db92bc7a5df0ffb4627cfb Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 11 Apr 2018 15:17:59 +1000 Subject: [PATCH 274/660] powerpc/xive: Fix trying to "push" an already active pool VP When setting up a CPU, we "push" (activate) a pool VP for it. However it's an error to do so if it already has an active pool VP. This happens when doing soft CPU hotplug on powernv since we don't tear down the CPU on unplug. The HW flags the error which gets captured by the diagnostics. Fix this by making sure to "pull" out any already active pool first. Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xive/native.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index d22aeb0b69e1..b48454be5b98 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -389,6 +389,10 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) if (xive_pool_vps == XIVE_INVALID_VP) return; + /* Check if pool VP already active, if it is, pull it */ + if (in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2) & TM_QW2W2_VP) + in_be64(xive_tima + TM_SPC_PULL_POOL_CTX); + /* Enable the pool VP */ vp = xive_pool_vps + cpu; pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp); From b2569260d55228b617bd82aba6d0db2faeeb4116 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 18 Apr 2018 11:49:31 -0400 Subject: [PATCH 275/660] ext4: set h_journal if there is a failure starting a reserved handle If ext4 tries to start a reserved handle via jbd2_journal_start_reserved(), and the journal has been aborted, this can result in a NULL pointer dereference. This is because the fields h_journal and h_transaction in the handle structure share the same memory, via a union, so jbd2_journal_start_reserved() will clear h_journal before calling start_this_handle(). If this function fails due to an aborted handle, h_journal will still be NULL, and the call to jbd2_journal_free_reserved() will pass a NULL journal to sub_reserve_credits(). This can be reproduced by running "kvm-xfstests -c dioread_nolock generic/475". Cc: stable@kernel.org # 3.11 Fixes: 8f7d89f36829b ("jbd2: transaction reservation support") Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Reviewed-by: Jan Kara --- fs/jbd2/transaction.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index ac311037d7a5..8aa453784402 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -532,6 +532,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, */ ret = start_this_handle(journal, handle, GFP_NOFS); if (ret < 0) { + handle->h_journal = journal; jbd2_journal_free_reserved(handle); return ret; } From 5692fceebeb7f11c07e2a990f7711a01ae437de2 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 11 Apr 2018 16:15:43 -0500 Subject: [PATCH 276/660] ARM: OMAP2+: Fix build when using split object directories The sleep33xx and sleep43xx files should not depend on a header file generated in drivers/memory. Remove this dependency and instead allow both drivers/memory and arch/arm/mach-omap2 to generate all macros needed in headers local to their own paths. This fixes an issue where the build fail will when using O= to set a split object directory and arch/arm/mach-omap2 is built before drivers/memory with the following error: .../drivers/memory/emif-asm-offsets.c:1:0: fatal error: can't open drivers/memory/emif-asm-offsets.s for writing: No such file or directory compilation terminated. Fixes: 41d9d44d7258 ("ARM: OMAP2+: pm33xx-core: Add platform code needed for PM") Reviewed-by: Masahiro Yamada Signed-off-by: Dave Gerlach Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Makefile | 6 +-- arch/arm/mach-omap2/pm-asm-offsets.c | 3 ++ arch/arm/mach-omap2/sleep33xx.S | 1 - arch/arm/mach-omap2/sleep43xx.S | 1 - drivers/memory/emif-asm-offsets.c | 72 +------------------------- include/linux/ti-emif-sram.h | 75 ++++++++++++++++++++++++++++ 6 files changed, 80 insertions(+), 78 deletions(-) diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 4603c30fef73..0d9ce58bc464 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -243,8 +243,4 @@ arch/arm/mach-omap2/pm-asm-offsets.s: arch/arm/mach-omap2/pm-asm-offsets.c include/generated/ti-pm-asm-offsets.h: arch/arm/mach-omap2/pm-asm-offsets.s FORCE $(call filechk,offsets,__TI_PM_ASM_OFFSETS_H__) -# For rule to generate ti-emif-asm-offsets.h dependency -include drivers/memory/Makefile.asm-offsets - -arch/arm/mach-omap2/sleep33xx.o: include/generated/ti-pm-asm-offsets.h include/generated/ti-emif-asm-offsets.h -arch/arm/mach-omap2/sleep43xx.o: include/generated/ti-pm-asm-offsets.h include/generated/ti-emif-asm-offsets.h +$(obj)/sleep33xx.o $(obj)/sleep43xx.o: include/generated/ti-pm-asm-offsets.h diff --git a/arch/arm/mach-omap2/pm-asm-offsets.c b/arch/arm/mach-omap2/pm-asm-offsets.c index 6d4392da7c11..b9846b19e5e2 100644 --- a/arch/arm/mach-omap2/pm-asm-offsets.c +++ b/arch/arm/mach-omap2/pm-asm-offsets.c @@ -7,9 +7,12 @@ #include #include +#include int main(void) { + ti_emif_asm_offsets(); + DEFINE(AMX3_PM_WFI_FLAGS_OFFSET, offsetof(struct am33xx_pm_sram_data, wfi_flags)); DEFINE(AMX3_PM_L2_AUX_CTRL_VAL_OFFSET, diff --git a/arch/arm/mach-omap2/sleep33xx.S b/arch/arm/mach-omap2/sleep33xx.S index 218d79930b04..322b3bb868b4 100644 --- a/arch/arm/mach-omap2/sleep33xx.S +++ b/arch/arm/mach-omap2/sleep33xx.S @@ -6,7 +6,6 @@ * Dave Gerlach, Vaibhav Bedia */ -#include #include #include #include diff --git a/arch/arm/mach-omap2/sleep43xx.S b/arch/arm/mach-omap2/sleep43xx.S index b24be624e8b9..8903814a6677 100644 --- a/arch/arm/mach-omap2/sleep43xx.S +++ b/arch/arm/mach-omap2/sleep43xx.S @@ -6,7 +6,6 @@ * Dave Gerlach, Vaibhav Bedia */ -#include #include #include #include diff --git a/drivers/memory/emif-asm-offsets.c b/drivers/memory/emif-asm-offsets.c index 71a89d5d3efd..db8043019ec6 100644 --- a/drivers/memory/emif-asm-offsets.c +++ b/drivers/memory/emif-asm-offsets.c @@ -16,77 +16,7 @@ int main(void) { - DEFINE(EMIF_SDCFG_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_sdcfg_val)); - DEFINE(EMIF_TIMING1_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_timing1_val)); - DEFINE(EMIF_TIMING2_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_timing2_val)); - DEFINE(EMIF_TIMING3_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_timing3_val)); - DEFINE(EMIF_REF_CTRL_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_ref_ctrl_val)); - DEFINE(EMIF_ZQCFG_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_zqcfg_val)); - DEFINE(EMIF_PMCR_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_pmcr_val)); - DEFINE(EMIF_PMCR_SHDW_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_pmcr_shdw_val)); - DEFINE(EMIF_RD_WR_LEVEL_RAMP_CTRL_OFFSET, - offsetof(struct emif_regs_amx3, emif_rd_wr_level_ramp_ctrl)); - DEFINE(EMIF_RD_WR_EXEC_THRESH_OFFSET, - offsetof(struct emif_regs_amx3, emif_rd_wr_exec_thresh)); - DEFINE(EMIF_COS_CONFIG_OFFSET, - offsetof(struct emif_regs_amx3, emif_cos_config)); - DEFINE(EMIF_PRIORITY_TO_COS_MAPPING_OFFSET, - offsetof(struct emif_regs_amx3, emif_priority_to_cos_mapping)); - DEFINE(EMIF_CONNECT_ID_SERV_1_MAP_OFFSET, - offsetof(struct emif_regs_amx3, emif_connect_id_serv_1_map)); - DEFINE(EMIF_CONNECT_ID_SERV_2_MAP_OFFSET, - offsetof(struct emif_regs_amx3, emif_connect_id_serv_2_map)); - DEFINE(EMIF_OCP_CONFIG_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_ocp_config_val)); - DEFINE(EMIF_LPDDR2_NVM_TIM_OFFSET, - offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim)); - DEFINE(EMIF_LPDDR2_NVM_TIM_SHDW_OFFSET, - offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim_shdw)); - DEFINE(EMIF_DLL_CALIB_CTRL_VAL_OFFSET, - offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val)); - DEFINE(EMIF_DLL_CALIB_CTRL_VAL_SHDW_OFFSET, - offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val_shdw)); - DEFINE(EMIF_DDR_PHY_CTLR_1_OFFSET, - offsetof(struct emif_regs_amx3, emif_ddr_phy_ctlr_1)); - DEFINE(EMIF_EXT_PHY_CTRL_VALS_OFFSET, - offsetof(struct emif_regs_amx3, emif_ext_phy_ctrl_vals)); - DEFINE(EMIF_REGS_AMX3_SIZE, sizeof(struct emif_regs_amx3)); - - BLANK(); - - DEFINE(EMIF_PM_BASE_ADDR_VIRT_OFFSET, - offsetof(struct ti_emif_pm_data, ti_emif_base_addr_virt)); - DEFINE(EMIF_PM_BASE_ADDR_PHYS_OFFSET, - offsetof(struct ti_emif_pm_data, ti_emif_base_addr_phys)); - DEFINE(EMIF_PM_CONFIG_OFFSET, - offsetof(struct ti_emif_pm_data, ti_emif_sram_config)); - DEFINE(EMIF_PM_REGS_VIRT_OFFSET, - offsetof(struct ti_emif_pm_data, regs_virt)); - DEFINE(EMIF_PM_REGS_PHYS_OFFSET, - offsetof(struct ti_emif_pm_data, regs_phys)); - DEFINE(EMIF_PM_DATA_SIZE, sizeof(struct ti_emif_pm_data)); - - BLANK(); - - DEFINE(EMIF_PM_SAVE_CONTEXT_OFFSET, - offsetof(struct ti_emif_pm_functions, save_context)); - DEFINE(EMIF_PM_RESTORE_CONTEXT_OFFSET, - offsetof(struct ti_emif_pm_functions, restore_context)); - DEFINE(EMIF_PM_ENTER_SR_OFFSET, - offsetof(struct ti_emif_pm_functions, enter_sr)); - DEFINE(EMIF_PM_EXIT_SR_OFFSET, - offsetof(struct ti_emif_pm_functions, exit_sr)); - DEFINE(EMIF_PM_ABORT_SR_OFFSET, - offsetof(struct ti_emif_pm_functions, abort_sr)); - DEFINE(EMIF_PM_FUNCTIONS_SIZE, sizeof(struct ti_emif_pm_functions)); + ti_emif_asm_offsets(); return 0; } diff --git a/include/linux/ti-emif-sram.h b/include/linux/ti-emif-sram.h index 45bc6b376492..53604b087f2c 100644 --- a/include/linux/ti-emif-sram.h +++ b/include/linux/ti-emif-sram.h @@ -60,6 +60,81 @@ struct ti_emif_pm_functions { u32 abort_sr; } __packed __aligned(8); +static inline void ti_emif_asm_offsets(void) +{ + DEFINE(EMIF_SDCFG_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_sdcfg_val)); + DEFINE(EMIF_TIMING1_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_timing1_val)); + DEFINE(EMIF_TIMING2_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_timing2_val)); + DEFINE(EMIF_TIMING3_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_timing3_val)); + DEFINE(EMIF_REF_CTRL_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_ref_ctrl_val)); + DEFINE(EMIF_ZQCFG_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_zqcfg_val)); + DEFINE(EMIF_PMCR_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_pmcr_val)); + DEFINE(EMIF_PMCR_SHDW_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_pmcr_shdw_val)); + DEFINE(EMIF_RD_WR_LEVEL_RAMP_CTRL_OFFSET, + offsetof(struct emif_regs_amx3, emif_rd_wr_level_ramp_ctrl)); + DEFINE(EMIF_RD_WR_EXEC_THRESH_OFFSET, + offsetof(struct emif_regs_amx3, emif_rd_wr_exec_thresh)); + DEFINE(EMIF_COS_CONFIG_OFFSET, + offsetof(struct emif_regs_amx3, emif_cos_config)); + DEFINE(EMIF_PRIORITY_TO_COS_MAPPING_OFFSET, + offsetof(struct emif_regs_amx3, emif_priority_to_cos_mapping)); + DEFINE(EMIF_CONNECT_ID_SERV_1_MAP_OFFSET, + offsetof(struct emif_regs_amx3, emif_connect_id_serv_1_map)); + DEFINE(EMIF_CONNECT_ID_SERV_2_MAP_OFFSET, + offsetof(struct emif_regs_amx3, emif_connect_id_serv_2_map)); + DEFINE(EMIF_OCP_CONFIG_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_ocp_config_val)); + DEFINE(EMIF_LPDDR2_NVM_TIM_OFFSET, + offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim)); + DEFINE(EMIF_LPDDR2_NVM_TIM_SHDW_OFFSET, + offsetof(struct emif_regs_amx3, emif_lpddr2_nvm_tim_shdw)); + DEFINE(EMIF_DLL_CALIB_CTRL_VAL_OFFSET, + offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val)); + DEFINE(EMIF_DLL_CALIB_CTRL_VAL_SHDW_OFFSET, + offsetof(struct emif_regs_amx3, emif_dll_calib_ctrl_val_shdw)); + DEFINE(EMIF_DDR_PHY_CTLR_1_OFFSET, + offsetof(struct emif_regs_amx3, emif_ddr_phy_ctlr_1)); + DEFINE(EMIF_EXT_PHY_CTRL_VALS_OFFSET, + offsetof(struct emif_regs_amx3, emif_ext_phy_ctrl_vals)); + DEFINE(EMIF_REGS_AMX3_SIZE, sizeof(struct emif_regs_amx3)); + + BLANK(); + + DEFINE(EMIF_PM_BASE_ADDR_VIRT_OFFSET, + offsetof(struct ti_emif_pm_data, ti_emif_base_addr_virt)); + DEFINE(EMIF_PM_BASE_ADDR_PHYS_OFFSET, + offsetof(struct ti_emif_pm_data, ti_emif_base_addr_phys)); + DEFINE(EMIF_PM_CONFIG_OFFSET, + offsetof(struct ti_emif_pm_data, ti_emif_sram_config)); + DEFINE(EMIF_PM_REGS_VIRT_OFFSET, + offsetof(struct ti_emif_pm_data, regs_virt)); + DEFINE(EMIF_PM_REGS_PHYS_OFFSET, + offsetof(struct ti_emif_pm_data, regs_phys)); + DEFINE(EMIF_PM_DATA_SIZE, sizeof(struct ti_emif_pm_data)); + + BLANK(); + + DEFINE(EMIF_PM_SAVE_CONTEXT_OFFSET, + offsetof(struct ti_emif_pm_functions, save_context)); + DEFINE(EMIF_PM_RESTORE_CONTEXT_OFFSET, + offsetof(struct ti_emif_pm_functions, restore_context)); + DEFINE(EMIF_PM_ENTER_SR_OFFSET, + offsetof(struct ti_emif_pm_functions, enter_sr)); + DEFINE(EMIF_PM_EXIT_SR_OFFSET, + offsetof(struct ti_emif_pm_functions, exit_sr)); + DEFINE(EMIF_PM_ABORT_SR_OFFSET, + offsetof(struct ti_emif_pm_functions, abort_sr)); + DEFINE(EMIF_PM_FUNCTIONS_SIZE, sizeof(struct ti_emif_pm_functions)); +} + struct gen_pool; int ti_emif_copy_pm_function_table(struct gen_pool *sram_pool, void *dst); From 8aec5fc1d4d881fe446addb94309efb39d4e5b23 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Mar 2018 23:17:42 +0200 Subject: [PATCH 277/660] ARM64: dts: meson-gxl: add USB host support This adds USB host support to the Meson GXL SoC. A dwc3 controller is used for host-mode, while a dwc2 controller (not added in this patch because I could not get it working) is used for device-mode only. The dwc3 controller's internal roothub has two USB2 ports enabled but no USB3 port. Each of the ports is supplied by a separate PHY. The USB pins are connected to the SoC's USBHOST_A and USBOTG_B pins. Due to the way the roothub works internally the USB PHYs are left enabled. When the dwc3 controller is disabled the PHY is never powered on so it does not draw any extra power. However, when the dwc3 host controller is enabled then all PHYs also have to be enabled, otherwise USB devices will not be detected (regardless of whether they are plugged into an enabled port or not). This means that only the dwc3 controller has to be enabled on boards with USB support (instead of requiring all boards to enable the PHYs additionally with the chance of forgetting to enable one and breaking all other ports with that as well). This also adds the USB3 PHY which currently only does some basic initialization. That however is required because without it high-speed devices (like USB thumb drives) do not work on some devices (probably because the bootloader does not configure the USB3 PHY registers). Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index e1a39cbed8c9..dba365ed4bd5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -20,6 +20,67 @@ no-map; }; }; + + soc { + usb0: usb@c9000000 { + status = "disabled"; + compatible = "amlogic,meson-gxl-dwc3"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clocks = <&clkc CLKID_USB>; + clock-names = "usb_general"; + resets = <&reset RESET_USB_OTG>; + reset-names = "usb_otg"; + + dwc3: dwc3@c9000000 { + compatible = "snps,dwc3"; + reg = <0x0 0xc9000000 0x0 0x100000>; + interrupts = ; + dr_mode = "host"; + maximum-speed = "high-speed"; + snps,dis_u2_susphy_quirk; + phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>; + }; + }; + }; +}; + +&apb { + usb2_phy0: phy@78000 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x78000 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + status = "okay"; + }; + + usb2_phy1: phy@78020 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x78020 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + status = "okay"; + }; + + usb3_phy: phy@78080 { + compatible = "amlogic,meson-gxl-usb3-phy"; + #phy-cells = <0>; + reg = <0x0 0x78080 0x0 0x20>; + interrupts = ; + clocks = <&clkc CLKID_USB>, <&clkc_AO CLKID_AO_CEC_32K>; + clock-names = "phy", "peripheral"; + resets = <&reset RESET_USB_OTG>, <&reset RESET_USB_OTG>; + reset-names = "phy", "peripheral"; + status = "okay"; + }; }; ðmac { From 458baa95c86406c81c6ebac0a98d1689075a3ec4 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Mar 2018 23:17:43 +0200 Subject: [PATCH 278/660] ARM64: dts: meson-gxm: add GXM specific USB host configuration The USB configuration on GXM is slightly different than on GXL. The dwc3 controller's internal hub has three USB2 ports (instead of 2 on GXL) along with a dedicated USB2 PHY for this port. However, it seems that there are no pins on GXM which would allow connecting the third port to a physical USB port. Passing the third PHY is required though, because without it none of the other USB ports is working (this seems to be a limitation of how the internal USB hub works, if one PHY is disabled then no USB port works). Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxm.dtsi | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi index d076a7c425dd..247888d68a3a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi @@ -80,6 +80,19 @@ }; }; +&apb { + usb2_phy2: phy@78040 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x78040 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + status = "okay"; + }; +}; + &clkc_AO { compatible = "amlogic,meson-gxm-aoclkc", "amlogic,meson-gx-aoclkc"; }; @@ -100,3 +113,7 @@ &hdmi_tx { compatible = "amlogic,meson-gxm-dw-hdmi", "amlogic,meson-gx-dw-hdmi"; }; + +&dwc3 { + phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>, <&usb2_phy2>; +}; From b9f07cb4f41fccbe7616482015d28e6e26aec3a3 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Mar 2018 23:17:44 +0200 Subject: [PATCH 279/660] ARM64: dts: meson-gxl-s905x-p212: enable the USB controller All boards based on the P212 reference design (the P212 reference board itself and the Khadas VIM) have USB connectors (in case of the Khadas VIM the first port is exposed through the USB Type-C connector, the second port is connected to a 4-port USB hub). This enables the USB controller on these boards to make the USB ports actually usable. Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi index 0a0953fbc7d4..0cfd701809de 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi @@ -185,3 +185,7 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; From 972cd12a027256061c19c164021f2a771e860438 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Mar 2018 23:17:45 +0200 Subject: [PATCH 280/660] ARM64: dts: meson-gx-p23x-q20x: enable the USB controller All S905D (GXL) and S912 (GXM) reference boards (namely these are P230, P231, Q200 and Q201) provide USB connectors. This enables the USB controller on these boards to make the USB ports actually usable. Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi index 4eef36b22538..88e712ea757a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi @@ -212,3 +212,7 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; From b83687f359d9b4128073f06ab7a06489eb04aa7c Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Mar 2018 23:17:46 +0200 Subject: [PATCH 281/660] ARM64: dts: meson-gxl-s905x-libretech-cc: enable the USB controller The LibreTech CC ("Le Potato") board provides four USB connectors. These are provided by a hub which is connected to the SoC's USB controller. Enable the SoC's USB controller to make the USB ports usable. Also turn on the HDMI_5V regulator when powering on the PHY because (even though it's not shown in the schematics) HDMI_5V also supplies the USB VBUS. Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- .../dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 22bf37404ff1..3e3eb31748a3 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -271,3 +271,15 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; + +&usb2_phy0 { + /* + * even though the schematics don't show it: + * HDMI_5V is also used as supply for the USB VBUS. + */ + phy-supply = <&hdmi_5v>; +}; From 55ef32249bb647c6b64adcf943918d302a0020a7 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Mar 2018 23:17:47 +0200 Subject: [PATCH 282/660] ARM64: dts: meson-gxl-nexbox-a95x: enable the USB controller The Nexbox A95X provides two USB ports. Enable the SoC's USB controller on this board to make these USB ports usable. Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts index 69c721a70e44..6739697be1de 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts @@ -215,3 +215,7 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; From 4b7b0d7b25538d2ad421a1041267d5208d3425bc Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Mar 2018 23:17:48 +0200 Subject: [PATCH 283/660] ARM64: dts: meson-gxm-khadas-vim2: enable the USB controller The Khadas VIM2 board connects the dwc3 controller to an internal 4-port USB hub which. Two of these ports are accessible directly soldered to the board, while the other two are accessible through the 40-pin "GPIO" header. Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index 4fd46c1546a7..0868da476e41 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -406,3 +406,7 @@ status = "okay"; vref-supply = <&vddio_ao18>; }; + +&usb0 { + status = "okay"; +}; From be47e41d77fba5bc17e9fb5f1c99217bb6691989 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Tue, 17 Apr 2018 21:25:42 +0200 Subject: [PATCH 284/660] tipc: fix use-after-free in tipc_nametbl_stop When we delete a service item in tipc_nametbl_stop() we loop over all service ranges in the service's RB tree, and for each service range we loop over its pertaining publications while calling tipc_service_remove_publ() for each of them. However, tipc_service_remove_publ() has the side effect that it also removes the comprising service range item when there are no publications left. This leads to a "use-after-free" access when the inner loop continues to the next iteration, since the range item holding the list we are looping no longer exists. We fix this by moving the delete of the service range item outside the said function. Instead, we now let the two functions calling it test if the list is empty and perform the removal when that is the case. Reported-by: syzbot+d64b64afc55660106556@syzkaller.appspotmail.com Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_table.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 4068eaad61a6..dd1c4fa2eb78 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -241,7 +241,8 @@ err: static struct publication *tipc_service_remove_publ(struct net *net, struct tipc_service *sc, u32 lower, u32 upper, - u32 node, u32 key) + u32 node, u32 key, + struct service_range **rng) { struct tipc_subscription *sub, *tmp; struct service_range *sr; @@ -275,19 +276,15 @@ static struct publication *tipc_service_remove_publ(struct net *net, list_del(&p->all_publ); list_del(&p->local_publ); - - /* Remove service range item if this was its last publication */ - if (list_empty(&sr->all_publ)) { + if (list_empty(&sr->all_publ)) last = true; - rb_erase(&sr->tree_node, &sc->ranges); - kfree(sr); - } /* Notify any waiting subscriptions */ list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_WITHDRAWN, p->port, p->node, p->scope, last); } + *rng = sr; return p; } @@ -379,13 +376,20 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 node, u32 key) { struct tipc_service *sc = tipc_service_find(net, type); + struct service_range *sr = NULL; struct publication *p = NULL; if (!sc) return NULL; spin_lock_bh(&sc->lock); - p = tipc_service_remove_publ(net, sc, lower, upper, node, key); + p = tipc_service_remove_publ(net, sc, lower, upper, node, key, &sr); + + /* Remove service range item if this was its last publication */ + if (sr && list_empty(&sr->all_publ)) { + rb_erase(&sr->tree_node, &sc->ranges); + kfree(sr); + } /* Delete service item if this no more publications and subscriptions */ if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) { @@ -747,16 +751,17 @@ int tipc_nametbl_init(struct net *net) static void tipc_service_delete(struct net *net, struct tipc_service *sc) { struct service_range *sr, *tmpr; - struct publication *p, *tmpb; + struct publication *p, *tmp; spin_lock_bh(&sc->lock); rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) { - list_for_each_entry_safe(p, tmpb, - &sr->all_publ, all_publ) { + list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) { tipc_service_remove_publ(net, sc, p->lower, p->upper, - p->node, p->key); + p->node, p->key, &sr); kfree_rcu(p, rcu); } + rb_erase(&sr->tree_node, &sc->ranges); + kfree(sr); } hlist_del_init_rcu(&sc->service_list); spin_unlock_bh(&sc->lock); From 36a50a989ee8267588de520b8704b85f045a3220 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 17 Apr 2018 21:58:27 +0200 Subject: [PATCH 285/660] tipc: fix infinite loop when dumping link monitor summary When configuring the number of used bearers to MAX_BEARER and issuing command "tipc link monitor summary", the command enters infinite loop in user space. This issue happens because function tipc_nl_node_dump_monitor() returns the wrong 'prev_bearer' value when all potential monitors have been scanned. The correct behavior is to always try to scan all monitors until either the netlink message is full, in which case we return the bearer identity of the affected monitor, or we continue through the whole bearer array until we can return MAX_BEARERS. This solution also caters for the case where there may be gaps in the bearer array. Signed-off-by: Tung Nguyen Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/monitor.c | 2 +- net/tipc/node.c | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 32dc33a94bc7..5453e564da82 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -777,7 +777,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, ret = tipc_bearer_get_name(net, bearer_name, bearer_id); if (ret || !mon) - return -EINVAL; + return 0; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_MON_GET); diff --git a/net/tipc/node.c b/net/tipc/node.c index c77dd2f3c589..6f98b56dd48e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2232,8 +2232,8 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); u32 prev_bearer = cb->args[0]; struct tipc_nl_msg msg; + int bearer_id; int err; - int i; if (prev_bearer == MAX_BEARERS) return 0; @@ -2243,16 +2243,13 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); - for (i = prev_bearer; i < MAX_BEARERS; i++) { - prev_bearer = i; + for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) { err = __tipc_nl_add_monitor(net, &msg, prev_bearer); if (err) - goto out; + break; } - -out: rtnl_unlock(); - cb->args[0] = prev_bearer; + cb->args[0] = bearer_id; return skb->len; } From 81c895072d29cd70eea5be1a8587cd6461c3715a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 17 Apr 2018 22:46:38 +0200 Subject: [PATCH 286/660] tun: fix vlan packet truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bogus trimming in tun_net_xmit() causes truncated vlan packets. skb->len is correct whether or not skb_vlan_tag_present() is true. There is no more reason to adjust the skb length on xmit in this driver than any other driver. tun_put_user() adds 4 bytes to the total for tagged packets because it transmits the tag inline to userspace. This is similar to a nic transmitting the tag inline on the wire. Reproducing the bug by sending any tagged packet through back-to-back connected tap interfaces: socat TUN,tun-type=tap,iff-up,tun-name=in TUN,tun-type=tap,iff-up,tun-name=out & ip link add link in name in.20 type vlan id 20 ip addr add 10.9.9.9/24 dev in.20 ip link set in.20 up tshark -nxxi in -f arp -c1 2>/dev/null & tshark -nxxi out -f arp -c1 2>/dev/null & ping -c 1 10.9.9.5 >/dev/null 2>&1 The output from the 'in' and 'out' interfaces are different when the bug is present: Capturing on 'in' 0000 ff ff ff ff ff ff 76 cf 76 37 d5 0a 81 00 00 14 ......v.v7...... 0010 08 06 00 01 08 00 06 04 00 01 76 cf 76 37 d5 0a ..........v.v7.. 0020 0a 09 09 09 00 00 00 00 00 00 0a 09 09 05 .............. Capturing on 'out' 0000 ff ff ff ff ff ff 76 cf 76 37 d5 0a 81 00 00 14 ......v.v7...... 0010 08 06 00 01 08 00 06 04 00 01 76 cf 76 37 d5 0a ..........v.v7.. 0020 0a 09 09 09 00 00 00 00 00 00 .......... Fixes: aff3d70a07ff ("tun: allow to attach ebpf socket filter") Cc: Jason Wang Signed-off-by: Bjørn Mork Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 28583aa0c17d..ef33950a45d9 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1102,12 +1102,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; len = run_ebpf_filter(tun, skb, len); - - /* Trim extra bytes since we may insert vlan proto & TCI - * in tun_put_user(). - */ - len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0; - if (len <= 0 || pskb_trim(skb, len)) + if (len == 0 || pskb_trim(skb, len)) goto drop; if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) From 4555a5021fe88fc4f19ff53d1e58b410cf30a49a Mon Sep 17 00:00:00 2001 From: Srinath Mannam Date: Wed, 18 Apr 2018 14:11:29 +0530 Subject: [PATCH 287/660] arm64: dts: correct SATA addresses for Stingray Correct all SATA ahci and phy controller register addresses and interrupt lines to proper values. Fixes: 344a2e514182 ("arm64: dts: Add SATA DT nodes for Stingray SoC") Signed-off-by: Srinath Mannam Reviewed-by: Ray Jui Reviewed-by: Scott Branden Reviewed-by: Andrew Gospodarek Signed-off-by: Florian Fainelli --- .../dts/broadcom/stingray/stingray-sata.dtsi | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi index 4b5465da81d8..8c68e0c26f1b 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi @@ -36,11 +36,11 @@ #size-cells = <1>; ranges = <0x0 0x0 0x67d00000 0x00800000>; - sata0: ahci@210000 { + sata0: ahci@0 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00210000 0x1000>; + reg = <0x00000000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -52,9 +52,9 @@ }; }; - sata_phy0: sata_phy@212100 { + sata_phy0: sata_phy@2100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00212100 0x1000>; + reg = <0x00002100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -66,11 +66,11 @@ }; }; - sata1: ahci@310000 { + sata1: ahci@10000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00310000 0x1000>; + reg = <0x00010000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -82,9 +82,9 @@ }; }; - sata_phy1: sata_phy@312100 { + sata_phy1: sata_phy@12100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00312100 0x1000>; + reg = <0x00012100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -96,11 +96,11 @@ }; }; - sata2: ahci@120000 { + sata2: ahci@20000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00120000 0x1000>; + reg = <0x00020000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -112,9 +112,9 @@ }; }; - sata_phy2: sata_phy@122100 { + sata_phy2: sata_phy@22100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00122100 0x1000>; + reg = <0x00022100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -126,11 +126,11 @@ }; }; - sata3: ahci@130000 { + sata3: ahci@30000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00130000 0x1000>; + reg = <0x00030000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -142,9 +142,9 @@ }; }; - sata_phy3: sata_phy@132100 { + sata_phy3: sata_phy@32100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00132100 0x1000>; + reg = <0x00032100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -156,11 +156,11 @@ }; }; - sata4: ahci@330000 { + sata4: ahci@100000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00330000 0x1000>; + reg = <0x00100000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -172,9 +172,9 @@ }; }; - sata_phy4: sata_phy@332100 { + sata_phy4: sata_phy@102100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00332100 0x1000>; + reg = <0x00102100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -186,11 +186,11 @@ }; }; - sata5: ahci@400000 { + sata5: ahci@110000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00400000 0x1000>; + reg = <0x00110000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -202,9 +202,9 @@ }; }; - sata_phy5: sata_phy@402100 { + sata_phy5: sata_phy@112100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00402100 0x1000>; + reg = <0x00112100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -216,11 +216,11 @@ }; }; - sata6: ahci@410000 { + sata6: ahci@120000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00410000 0x1000>; + reg = <0x00120000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -232,9 +232,9 @@ }; }; - sata_phy6: sata_phy@412100 { + sata_phy6: sata_phy@122100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00412100 0x1000>; + reg = <0x00122100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -246,11 +246,11 @@ }; }; - sata7: ahci@420000 { + sata7: ahci@130000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00420000 0x1000>; + reg = <0x00130000 0x1000>; reg-names = "ahci"; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -262,9 +262,9 @@ }; }; - sata_phy7: sata_phy@422100 { + sata_phy7: sata_phy@132100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00422100 0x1000>; + reg = <0x00132100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; From 4e5c01a7c746130033eb6e7153b346b8ca61e7a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Apr 2018 16:39:38 -0300 Subject: [PATCH 288/660] perf trace: Support MAP_FIXED_NOREPLACE Introduced in a4ff8e8620d3 ("mm: introduce MAP_FIXED_NOREPLACE"), and now that we have that define in the just syncronized tools/arch/*/include/uapi/asm/mman.h files, add support for it. This should really transition to autogeneration of string tables as done for various other things: $ ls /tmp/build/perf/trace/beauty/generated/*.c arch_errno_name_array.c kcmp_type_array.c madvise_behavior_array.c pkey_alloc_access_rights_array.c prctl_option_array.c $ head /tmp/build/perf/trace/beauty/generated/madvise_behavior_array.c static const char *madvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [8] = "FREE", [9] = "REMOVE", [10] = "DONTFORK", [11] = "DOFORK", $ Till then, add support for this the old way. Also it has to be ifdef'ed, because arches like mips still don't define it. The proper solution will be to have per-arch tables for these values to support cross-analysis. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michal Hocko Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-td9t5vhjltqnlzaurkkgq8cn@git.kernel.org Signef-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 417e3ecfe9d7..9f68077b241b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -54,6 +54,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(EXECUTABLE); P_MMAP_FLAG(FILE); P_MMAP_FLAG(FIXED); +#ifdef MAP_FIXED_NOREPLACE + P_MMAP_FLAG(FIXED_NOREPLACE); +#endif P_MMAP_FLAG(GROWSDOWN); P_MMAP_FLAG(HUGETLB); P_MMAP_FLAG(LOCKED); From a7e9eab3dbd35268c16244557a4155a2d9a641c3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 6 Apr 2018 13:38:09 -0700 Subject: [PATCH 289/660] perf mem: Allow all record/report options For perf mem report / perf mem record, pass all unknown options through to the underlying report/record commands. This makes things like perf mem record -a sleep 1 work. Matches how c2c and other tools work. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20180406203812.3087-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-mem.txt | 3 +++ tools/perf/builtin-mem.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index b0211410969b..8806ed5f3802 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -67,6 +67,9 @@ OPTIONS --phys-data:: Record/Report sample physical addresses +In addition, for report all perf report options are valid, and for record +all perf record options. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 506564651cda..57393e94d156 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -83,7 +83,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) }; argc = parse_options(argc, argv, options, record_mem_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + PARSE_OPT_KEEP_UNKNOWN); rec_argc = argc + 9; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -436,7 +436,7 @@ int cmd_mem(int argc, const char **argv) } argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, - mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); + mem_usage, PARSE_OPT_KEEP_UNKNOWN); if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) usage_with_options(mem_usage, mem_options); From 6a02f06edea5a5910c787fd6c49b0552e8080e5d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 6 Apr 2018 13:38:10 -0700 Subject: [PATCH 290/660] perf hists browser: Clarify top/report browser help Clarify in the browser help that ESC in tui mode may go back to the previous screen instead of just exiting (was not clear to me) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20180406203812.3087-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 0eec06c105c6..e5f247247daa 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2714,7 +2714,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "h/?/F1 Show this window\n" \ "UP/DOWN/PGUP\n" \ "PGDN/SPACE Navigate\n" \ - "q/ESC/CTRL+C Exit browser\n\n" \ + "q/ESC/CTRL+C Exit browser or go back to previous screen\n\n" \ "For multiple event sessions:\n\n" \ "TAB/UNTAB Switch events\n\n" \ "For symbolic views (--sort has sym):\n\n" \ From ec3948451e0ba317e66873b48d6cc51d701d4eb0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 6 Apr 2018 13:38:11 -0700 Subject: [PATCH 291/660] perf record: Remove misleading error suggestion When perf record encounters an error setting up an event it suggests to enable CONFIG_PERF_EVENTS. This is misleading because: - Usually it is enabled (it is really hard to disable on x86) - The problem is usually somewhere else, e.g. the CPU is not supported or an invalid configuration has been used. Remove the misleading suggestion. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20180406203812.3087-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1ac8d9236efd..66b62570c855 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2894,8 +2894,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg may provide additional information.\n" - "No CONFIG_PERF_EVENTS=y kernel support configured?", + "/bin/dmesg | grep -i perf may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), perf_evsel__name(evsel)); } From ccbb6afe0890b09cc828373a9a5fffab40ec85df Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 6 Apr 2018 13:38:12 -0700 Subject: [PATCH 292/660] perf record: Remove suggestion to enable APIC 'perf record' suggests to enable the APIC on errors. APIC is practically always used today and the problem is usually somewhere else. Just remove the outdated suggestion. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20180406203812.3087-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 66b62570c855..3e87486c28fe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2870,8 +2870,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, #if defined(__i386__) || defined(__x86_64__) if (evsel->attr.type == PERF_TYPE_HARDWARE) return scnprintf(msg, size, "%s", - "No hardware sampling interrupt available.\n" - "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it."); + "No hardware sampling interrupt available.\n"); #endif break; case EBUSY: From 66f5a0779af2a4f28b1832f6c20bc9d3b1ab1886 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 17 Apr 2018 09:43:44 +0530 Subject: [PATCH 293/660] perf tools: Add '\n' at the end of parse-options error messages Few error messages does not have '\n' at the end and thus next prompt gets printed in the same line. Ex, linux~$ perf buildid-cache -verbose --add ./a.out Error: did you mean `--verbose` (with two dashes ?)linux~$ Fix it. Signed-off-by: Ravi Bangoria Reviewed-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kate Stewart Cc: Krister Johansen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Sihyeon Jang Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180417041346.5617-2-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/parse-options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index f6a1babcbac4..cb7154eccbdc 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -433,7 +433,7 @@ match: if (ambiguous_option) { fprintf(stderr, - " Error: Ambiguous option: %s (could be --%s%s or --%s%s)", + " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n", arg, (ambiguous_flags & OPT_UNSET) ? "no-" : "", ambiguous_option->long_name, @@ -458,7 +458,7 @@ static void check_typos(const char *arg, const struct option *options) return; if (strstarts(arg, "no-")) { - fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); exit(129); } @@ -466,7 +466,7 @@ static void check_typos(const char *arg, const struct option *options) if (!options->long_name) continue; if (strstarts(options->long_name, arg)) { - fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); exit(129); } } From 518c6021e9b82696819b380cd173e76cac55a01e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Apr 2018 14:47:36 -0300 Subject: [PATCH 294/660] perf tests mmap: Show which tracepoint is failing In the 'perf test "mmap interface"' we try creating events for several tracepoints, but when perf_evsel__new() fails we're not showing which one is failing, fix that to help diagnosing problems, such as the syscall tracepoints ones being found and fixes in this merge window. Now the failing tests shows: # perf test -v "mmap interface" 4: Read samples using the mmap interface : --- start --- test child forked, pid 14311 perf_evsel__new(sys_enter_getppid) test child finished with -1 ---- end ---- Read samples using the mmap interface: FAILED! # Now to check why the syscalls:sys_enter_getppid is failing... # ls -la /sys/kernel/debug/tracing/events/syscalls/sys_enter_getppid ls: cannot access '/sys/kernel/debug/tracing/events/syscalls/sys_enter_getppid': No such file or directory # Cc: Adrian Hunter Cc: David Ahern Cc: Dominik Brodowski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Rostedt Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-44xk0ycdzrfzx1o9rklf5itl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index bb8e6bcb0d96..0919b0793e5b 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -75,7 +75,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); evsels[i] = perf_evsel__newtp("syscalls", name); if (IS_ERR(evsels[i])) { - pr_debug("perf_evsel__new\n"); + pr_debug("perf_evsel__new(%s)\n", name); goto out_delete_evlist; } From eccb1b936363c62544bccb5bbb75afec9536f7e3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Apr 2018 12:59:03 -0300 Subject: [PATCH 295/660] perf test BPF: Fixup BPF test using epoll_pwait syscall function probe Since e145242ea0df ("syscalls/core, syscalls/x86: Clean up syscall stub naming convention") changed the main syscall function for 'epoll_pwait' to something other than the expected 'SyS_epoll_pwait the' 'perf test BPF' entries started failing, fix it by using something called from the main syscall function instead, 'epoll_wait', which should keep this test working in older kernels too. Before: # perf test BPF 40: BPF filter : 40.1: Basic BPF filtering : FAILED! 40.2: BPF pinning : Skip 40.3: BPF prologue generation : Skip 40.4: BPF relocation checker : Skip If we use -v for that test we see the problem: Probe point 'SyS_epoll_pwait' not found. After: # perf test BPF 40: BPF filter : 40.1: Basic BPF filtering : Ok 40.2: BPF pinning : Ok 40.3: BPF prologue generation : Ok 40.4: BPF relocation checker : Ok # Cc: Adrian Hunter Cc: David Ahern Cc: Dominik Brodowski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Rostedt Cc: Wang Nan Link: https://lkml.kernel.org/r/tip-y24nmn70cs2am8jh4i344dng@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-example.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index e4123c1b0e88..1ca5106df5f1 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -31,7 +31,7 @@ struct bpf_map_def SEC("maps") flip_table = { .max_entries = 1, }; -SEC("func=SyS_epoll_pwait") +SEC("func=do_epoll_wait") int bpf_func__SyS_epoll_pwait(void *ctx) { int ind =0; From c96eebf07692e53bf4dd5987510d8b550e793598 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 17 Apr 2018 16:40:00 +0100 Subject: [PATCH 296/660] MIPS: memset.S: Fix clobber of v1 in last_fixup The label .Llast_fixup\@ is jumped to on page fault within the final byte set loop of memset (on < MIPSR6 architectures). For some reason, in this fault handler, the v1 register is randomly set to a2 & STORMASK. This clobbers v1 for the calling function. This can be observed with the following test code: static int __init __attribute__((optimize("O0"))) test_clear_user(void) { register int t asm("v1"); char *test; int j, k; pr_info("\n\n\nTesting clear_user\n"); test = vmalloc(PAGE_SIZE); for (j = 256; j < 512; j++) { t = 0xa5a5a5a5; if ((k = clear_user(test + PAGE_SIZE - 256, j)) != j - 256) { pr_err("clear_user (%px %d) returned %d\n", test + PAGE_SIZE - 256, j, k); } if (t != 0xa5a5a5a5) { pr_err("v1 was clobbered to 0x%x!\n", t); } } return 0; } late_initcall(test_clear_user); Which demonstrates that v1 is indeed clobbered (MIPS64): Testing clear_user v1 was clobbered to 0x1! v1 was clobbered to 0x2! v1 was clobbered to 0x3! v1 was clobbered to 0x4! v1 was clobbered to 0x5! v1 was clobbered to 0x6! v1 was clobbered to 0x7! Since the number of bytes that could not be set is already contained in a2, the andi placing a value in v1 is not necessary and actively harmful in clobbering v1. Reported-by: James Hogan Signed-off-by: Matt Redfearn Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/19109/ Signed-off-by: James Hogan --- arch/mips/lib/memset.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 184819c1d5c8..f7327979a8f8 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -258,7 +258,7 @@ .Llast_fixup\@: jr ra - andi v1, a2, STORMASK + nop .Lsmall_fixup\@: PTR_SUBU a2, t1, a0 From be71eda5383faa663efdba9ef54a6b8255e3c7f0 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 18 Apr 2018 09:14:36 +0200 Subject: [PATCH 297/660] module: Fix display of wrong module .text address Reading file /proc/modules shows the correct address: [root@s35lp76 ~]# cat /proc/modules | egrep '^qeth_l2' qeth_l2 94208 1 - Live 0x000003ff80401000 and reading file /sys/module/qeth_l2/sections/.text [root@s35lp76 ~]# cat /sys/module/qeth_l2/sections/.text 0x0000000018ea8363 displays a random address. This breaks the perf tool which uses this address on s390 to calculate start of .text section in memory. Fix this by printing the correct (unhashed) address. Thanks to Jessica Yu for helping on this. Fixes: ef0010a30935 ("vsprintf: don't use 'restricted_pointer()' when not restricting") Cc: # v4.15+ Suggested-by: Linus Torvalds Signed-off-by: Thomas Richter Cc: Jessica Yu Signed-off-by: Jessica Yu --- kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index a6e43a5806a1..ce8066b88178 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1472,7 +1472,8 @@ static ssize_t module_sect_show(struct module_attribute *mattr, { struct module_sect_attr *sattr = container_of(mattr, struct module_sect_attr, mattr); - return sprintf(buf, "0x%pK\n", (void *)sattr->address); + return sprintf(buf, "0x%px\n", kptr_restrict < 2 ? + (void *)sattr->address : NULL); } static void free_sect_attrs(struct module_sect_attrs *sect_attrs) From b3d7e55c3f886493235bfee08e1e5a4a27cbcce8 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 17 Apr 2018 16:40:01 +0100 Subject: [PATCH 298/660] MIPS: uaccess: Add micromips clobbers to bzero invocation The micromips implementation of bzero additionally clobbers registers t7 & t8. Specify this in the clobbers list when invoking bzero. Fixes: 26c5e07d1478 ("MIPS: microMIPS: Optimise 'memset' core library function.") Reported-by: James Hogan Signed-off-by: Matt Redfearn Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 3.10+ Patchwork: https://patchwork.linux-mips.org/patch/19110/ Signed-off-by: James Hogan --- arch/mips/include/asm/uaccess.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index b71306947290..06629011a434 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -654,6 +654,13 @@ __clear_user(void __user *addr, __kernel_size_t size) { __kernel_size_t res; +#ifdef CONFIG_CPU_MICROMIPS +/* micromips memset / bzero also clobbers t7 & t8 */ +#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$15", "$24", "$31" +#else +#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$31" +#endif /* CONFIG_CPU_MICROMIPS */ + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" @@ -663,7 +670,7 @@ __clear_user(void __user *addr, __kernel_size_t size) "move\t%0, $6" : "=r" (res) : "r" (addr), "r" (size) - : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31"); + : bzero_clobbers); } else { might_fault(); __asm__ __volatile__( @@ -674,7 +681,7 @@ __clear_user(void __user *addr, __kernel_size_t size) "move\t%0, $6" : "=r" (res) : "r" (addr), "r" (size) - : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31"); + : bzero_clobbers); } return res; From 0a0a7e00a250c117f0c7ad8e1184abd98e7c098a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 18 Apr 2018 10:49:12 -0700 Subject: [PATCH 299/660] tools/bpf: fix test_sock and test_sock_addr.sh failure The bpf selftests test_sock and test_sock_addr.sh failed in my test machine. The failure looks like: $ ./test_sock Test case: bind4 load with invalid access: src_ip6 .. [PASS] Test case: bind4 load with invalid access: mark .. [PASS] Test case: bind6 load with invalid access: src_ip4 .. [PASS] Test case: sock_create load with invalid access: src_port .. [PASS] Test case: sock_create load w/o expected_attach_type (compat mode) .. [FAIL] Test case: sock_create load w/ expected_attach_type .. [FAIL] Test case: attach type mismatch bind4 vs bind6 .. [FAIL] ... Summary: 4 PASSED, 12 FAILED $ ./test_sock_addr.sh Wait for testing IPv4/IPv6 to become available ..... ERROR: Timeout waiting for test IP to become available. In test_sock, bpf program loads failed due to hitting memlock limits. In test_sock_addr.sh, my test machine is a ipv6 only test box and using "ping" without specifying address family for an ipv6 address does not work. This patch fixed the issue by including header bpf_rlimit.h in test_sock.c and test_sock_addr.c, and specifying address family for ping command. Cc: Andrey Ignatov Signed-off-by: Yonghong Song Acked-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_sock.c | 1 + tools/testing/selftests/bpf/test_sock_addr.c | 1 + tools/testing/selftests/bpf/test_sock_addr.sh | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 73bb20cfb9b7..f4d99fabc56d 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index d488f20926e8..2950f80ba7fb 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -15,6 +15,7 @@ #include #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #define CG_PATH "/foo" #define CONNECT4_PROG_PATH "./connect4_prog.o" diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh index c6e1dcf992c4..9832a875a828 100755 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,7 @@ set -eu ping_once() { - ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1 + ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() @@ -13,7 +13,7 @@ wait_for_ip() echo -n "Wait for testing IPv4/IPv6 to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then + if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then echo " OK" return fi From 92d32170847bfff2dd08af2c016085779f2fd2a1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 16 Apr 2018 21:10:14 +0200 Subject: [PATCH 300/660] btrfs: fix unaligned access in readdir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last update to readdir introduced a temporary buffer to store the emitted readdir data, but as there are file names of variable length, there's a lot of unaligned access. This was observed on a sparc64 machine: Kernel unaligned access at TPC[102f3080] btrfs_real_readdir+0x51c/0x718 [btrfs] Fixes: 23b5ec74943 ("btrfs: fix readdir deadlock with pagefault") CC: stable@vger.kernel.org # 4.14+ Reported-and-tested-by: René Rebe Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e064c49c9a9a..d241285a0d2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -5905,11 +5906,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) struct dir_entry *entry = addr; char *name = (char *)(entry + 1); - ctx->pos = entry->offset; - if (!dir_emit(ctx, name, entry->name_len, entry->ino, - entry->type)) + ctx->pos = get_unaligned(&entry->offset); + if (!dir_emit(ctx, name, get_unaligned(&entry->name_len), + get_unaligned(&entry->ino), + get_unaligned(&entry->type))) return 1; - addr += sizeof(struct dir_entry) + entry->name_len; + addr += sizeof(struct dir_entry) + + get_unaligned(&entry->name_len); ctx->pos++; } return 0; @@ -5999,14 +6002,15 @@ again: } entry = addr; - entry->name_len = name_len; + put_unaligned(name_len, &entry->name_len); name_ptr = (char *)(entry + 1); read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), name_len); - entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], + &entry->type); btrfs_dir_item_key_to_cpu(leaf, di, &location); - entry->ino = location.objectid; - entry->offset = found_key.offset; + put_unaligned(location.objectid, &entry->ino); + put_unaligned(found_key.offset, &entry->offset); entries++; addr += sizeof(struct dir_entry) + name_len; total_len += sizeof(struct dir_entry) + name_len; From 64e86fec54069266ba32be551d7b7f75e88ab60c Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 17 Apr 2018 17:40:00 -0600 Subject: [PATCH 301/660] net: qualcomm: rmnet: Fix warning seen with fill_info When the last rmnet device attached to a real device is removed, the real device is unregistered from rmnet. As a result, the real device lookup fails resulting in a warning when the fill_info handler is called as part of the rmnet device unregistration. Fix this by returning the rmnet flags as 0 when no real device is present. WARNING: CPU: 0 PID: 1779 at net/core/rtnetlink.c:3254 rtmsg_ifinfo_build_skb+0xca/0x10d Modules linked in: CPU: 0 PID: 1779 Comm: ip Not tainted 4.16.0-11872-g7ce2367 #1 Stack: 7fe655f0 60371ea3 00000000 00000000 60282bc6 6006b116 7fe65600 60371ee8 7fe65660 6003a68c 00000000 900000000 Call Trace: [<6006b116>] ? printk+0x0/0x94 [<6001f375>] show_stack+0xfe/0x158 [<60371ea3>] ? dump_stack_print_info+0xe8/0xf1 [<60282bc6>] ? rtmsg_ifinfo_build_skb+0xca/0x10d [<6006b116>] ? printk+0x0/0x94 [<60371ee8>] dump_stack+0x2a/0x2c [<6003a68c>] __warn+0x10e/0x13e [<6003a82c>] warn_slowpath_null+0x48/0x4f [<60282bc6>] rtmsg_ifinfo_build_skb+0xca/0x10d [<60282c4d>] rtmsg_ifinfo_event.part.37+0x1e/0x43 [<60282c2f>] ? rtmsg_ifinfo_event.part.37+0x0/0x43 [<60282d03>] rtmsg_ifinfo+0x24/0x28 [<60264e86>] dev_close_many+0xba/0x119 [<60282cdf>] ? rtmsg_ifinfo+0x0/0x28 [<6027c225>] ? rtnl_is_locked+0x0/0x1c [<6026ca67>] rollback_registered_many+0x1ae/0x4ae [<600314be>] ? unblock_signals+0x0/0xae [<6026cdc0>] ? unregister_netdevice_queue+0x19/0xec [<6026ceec>] unregister_netdevice_many+0x21/0xa1 [<6027c765>] rtnl_delete_link+0x3e/0x4e [<60280ecb>] rtnl_dellink+0x262/0x29c [<6027c241>] ? rtnl_get_link+0x0/0x3e [<6027f867>] rtnetlink_rcv_msg+0x235/0x274 Fixes: be81a85f5f87 ("net: qualcomm: rmnet: Implement fill_info") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index d33988570217..5f4e447c5dce 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -350,15 +350,16 @@ static int rmnet_fill_info(struct sk_buff *skb, const struct net_device *dev) real_dev = priv->real_dev; - if (!rmnet_is_real_dev_registered(real_dev)) - return -ENODEV; - if (nla_put_u16(skb, IFLA_RMNET_MUX_ID, priv->mux_id)) goto nla_put_failure; - port = rmnet_get_port_rtnl(real_dev); + if (rmnet_is_real_dev_registered(real_dev)) { + port = rmnet_get_port_rtnl(real_dev); + f.flags = port->data_format; + } else { + f.flags = 0; + } - f.flags = port->data_format; f.mask = ~0; if (nla_put(skb, IFLA_RMNET_FLAGS, sizeof(f), &f)) From af17092810a887178195276255b7b31f8fbe7dbe Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 9 Apr 2018 15:15:28 -0700 Subject: [PATCH 302/660] scsi: iscsi: respond to netlink with unicast when appropriate Instead of always multicasting responses, send a unicast netlink message directed at the correct pid. This will be needed if we ever want to support multiple userspace processes interacting with the kernel over iSCSI netlink simultaneously. Limitations can currently be seen if you attempt to run multiple iscsistart commands in parallel. We've fixed up the userspace issues in iscsistart that prevented multiple instances from running, so now attempts to speed up booting by bringing up multiple iscsi sessions at once in the initramfs are just running into misrouted responses that this fixes. Signed-off-by: Chris Leech Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index f4b52b44b966..65f6c94f2e9b 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2322,6 +2322,12 @@ iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp) return nlmsg_multicast(nls, skb, 0, group, gfp); } +static int +iscsi_unicast_skb(struct sk_buff *skb, u32 portid) +{ + return nlmsg_unicast(nls, skb, portid); +} + int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size) { @@ -2524,14 +2530,11 @@ void iscsi_ping_comp_event(uint32_t host_no, struct iscsi_transport *transport, EXPORT_SYMBOL_GPL(iscsi_ping_comp_event); static int -iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi, - void *payload, int size) +iscsi_if_send_reply(u32 portid, int type, void *payload, int size) { struct sk_buff *skb; struct nlmsghdr *nlh; int len = nlmsg_total_size(size); - int flags = multi ? NLM_F_MULTI : 0; - int t = done ? NLMSG_DONE : type; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { @@ -2539,10 +2542,9 @@ iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi, return -ENOMEM; } - nlh = __nlmsg_put(skb, 0, 0, t, (len - sizeof(*nlh)), 0); - nlh->nlmsg_flags = flags; + nlh = __nlmsg_put(skb, 0, 0, type, (len - sizeof(*nlh)), 0); memcpy(nlmsg_data(nlh), payload, size); - return iscsi_multicast_skb(skb, group, GFP_ATOMIC); + return iscsi_unicast_skb(skb, portid); } static int @@ -3470,6 +3472,7 @@ static int iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; + u32 portid; struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; @@ -3490,10 +3493,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) if (!try_module_get(transport->owner)) return -EINVAL; + portid = NETLINK_CB(skb).portid; + switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_SESSION: err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).portid, + portid, ev->u.c_session.initial_cmdsn, ev->u.c_session.cmds_max, ev->u.c_session.queue_depth); @@ -3506,7 +3511,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) } err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).portid, + portid, ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); @@ -3664,6 +3669,8 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) static void iscsi_if_rx(struct sk_buff *skb) { + u32 portid = NETLINK_CB(skb).portid; + mutex_lock(&rx_queue_mutex); while (skb->len >= NLMSG_HDRLEN) { int err; @@ -3699,8 +3706,8 @@ iscsi_if_rx(struct sk_buff *skb) break; if (ev->type == ISCSI_UEVENT_GET_CHAP && !err) break; - err = iscsi_if_send_reply(group, nlh->nlmsg_seq, - nlh->nlmsg_type, 0, 0, ev, sizeof(*ev)); + err = iscsi_if_send_reply(portid, nlh->nlmsg_type, + ev, sizeof(*ev)); } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH); skb_pull(skb, rlen); } From 4f2c8bf6bdff306e8f918fa4bacf636c6b9ef760 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 10 Apr 2018 13:00:36 -0400 Subject: [PATCH 303/660] scsi: scsi_debug: IMMED related delay adjustments A patch titled: "[PATCH v2] scsi_debug: implement IMMED bit" introduced long delays to the Start stop unit (SSU) and Synchronize cache (SC) commands when the IMMED bit is clear. This patch makes those delays more realistic. It causes SSU to only delay when the start stop state is changed; SC only delays when there's been a write since the previous SC. It also reduced the SC delay from 1 second to 50 milliseconds. Signed-off-by: Douglas Gilbert Tested-by: Ming Lei Reported-by: Ming Lei Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 9ef5e3b810f6..656c98e116a9 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -234,11 +234,13 @@ static const char *sdebug_version_date = "20180128"; #define F_INV_OP 0x200 #define F_FAKE_RW 0x400 #define F_M_ACCESS 0x800 /* media access */ -#define F_LONG_DELAY 0x1000 +#define F_SSU_DELAY 0x1000 +#define F_SYNC_DELAY 0x2000 #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR) #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW) #define FF_SA (F_SA_HIGH | F_SA_LOW) +#define F_LONG_DELAY (F_SSU_DELAY | F_SYNC_DELAY) #define SDEBUG_MAX_PARTS 4 @@ -510,7 +512,7 @@ static const struct opcode_info_t release_iarr[] = { }; static const struct opcode_info_t sync_cache_iarr[] = { - {0, 0x91, 0, F_LONG_DELAY | F_M_ACCESS, resp_sync_cache, NULL, + {0, 0x91, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, NULL, {16, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* SYNC_CACHE (16) */ }; @@ -553,7 +555,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { resp_write_dt0, write_iarr, /* WRITE(16) */ {16, 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7} }, - {0, 0x1b, 0, F_LONG_DELAY, resp_start_stop, NULL,/* START STOP UNIT */ + {0, 0x1b, 0, F_SSU_DELAY, resp_start_stop, NULL,/* START STOP UNIT */ {6, 0x1, 0, 0xf, 0xf7, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, {ARRAY_SIZE(sa_in_16_iarr), 0x9e, 0x10, F_SA_LOW | F_D_IN, resp_readcap16, sa_in_16_iarr, /* SA_IN(16), READ CAPACITY(16) */ @@ -606,7 +608,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { resp_write_same_10, write_same_iarr, /* WRITE SAME(10) */ {10, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0} }, - {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_LONG_DELAY | F_M_ACCESS, + {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, sync_cache_iarr, {10, 0x7, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0} }, /* SYNC_CACHE (10) */ @@ -667,6 +669,7 @@ static bool sdebug_strict = DEF_STRICT; static bool sdebug_any_injecting_opt; static bool sdebug_verbose; static bool have_dif_prot; +static bool write_since_sync; static bool sdebug_statistics = DEF_STATISTICS; static unsigned int sdebug_store_sectors; @@ -1607,6 +1610,7 @@ static int resp_start_stop(struct scsi_cmnd *scp, { unsigned char *cmd = scp->cmnd; int power_cond, stop; + bool changing; power_cond = (cmd[4] & 0xf0) >> 4; if (power_cond) { @@ -1614,8 +1618,12 @@ static int resp_start_stop(struct scsi_cmnd *scp, return check_condition_result; } stop = !(cmd[4] & 1); + changing = atomic_read(&devip->stopped) == !stop; atomic_xchg(&devip->stopped, stop); - return (cmd[1] & 0x1) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */ + if (!changing || cmd[1] & 0x1) /* state unchanged or IMMED set */ + return SDEG_RES_IMMED_MASK; + else + return 0; } static sector_t get_sdebug_capacity(void) @@ -2473,6 +2481,7 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, if (do_write) { sdb = scsi_out(scmd); dir = DMA_TO_DEVICE; + write_since_sync = true; } else { sdb = scsi_in(scmd); dir = DMA_FROM_DEVICE; @@ -3583,6 +3592,7 @@ static int resp_get_lba_status(struct scsi_cmnd *scp, static int resp_sync_cache(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { + int res = 0; u64 lba; u32 num_blocks; u8 *cmd = scp->cmnd; @@ -3598,7 +3608,11 @@ static int resp_sync_cache(struct scsi_cmnd *scp, mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); return check_condition_result; } - return (cmd[1] & 0x2) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */ + if (!write_since_sync || cmd[1] & 0x2) + res = SDEG_RES_IMMED_MASK; + else /* delay if write_since_sync and IMMED clear */ + write_since_sync = false; + return res; } #define RL_BUCKET_ELEMS 8 @@ -5777,13 +5791,14 @@ fini: return schedule_resp(scp, devip, errsts, pfp, 0, 0); else if ((sdebug_jdelay || sdebug_ndelay) && (flags & F_LONG_DELAY)) { /* - * If any delay is active, want F_LONG_DELAY to be at least 1 + * If any delay is active, for F_SSU_DELAY want at least 1 * second and if sdebug_jdelay>0 want a long delay of that - * many seconds. + * many seconds; for F_SYNC_DELAY want 1/20 of that. */ int jdelay = (sdebug_jdelay < 2) ? 1 : sdebug_jdelay; + int denom = (flags & F_SYNC_DELAY) ? 20 : 1; - jdelay = mult_frac(USER_HZ * jdelay, HZ, USER_HZ); + jdelay = mult_frac(USER_HZ * jdelay, HZ, denom * USER_HZ); return schedule_resp(scp, devip, errsts, pfp, jdelay, 0); } else return schedule_resp(scp, devip, errsts, pfp, sdebug_jdelay, From 0cfce53a7880eb949e55fcfbe0d82c613f056fe0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Apr 2018 08:36:49 +0100 Subject: [PATCH 304/660] scsi: fnic: fix spelling mistake in fnic stats "Abord" -> "Abort" Trivial fix to spelling mistake in fnic stats message text. Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c index abddde11982b..98597b59c12a 100644 --- a/drivers/scsi/fnic/fnic_trace.c +++ b/drivers/scsi/fnic/fnic_trace.c @@ -296,7 +296,7 @@ int fnic_get_stats_data(struct stats_debug_info *debug, "Number of Abort FW Timeouts: %lld\n" "Number of Abort IO NOT Found: %lld\n" - "Abord issued times: \n" + "Abort issued times: \n" " < 6 sec : %lld\n" " 6 sec - 20 sec : %lld\n" " 20 sec - 30 sec : %lld\n" From 13a83eac373c49c0a081cbcd137e79210fe78acd Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 11 Apr 2018 13:37:58 +1000 Subject: [PATCH 305/660] powerpc/eeh: Fix enabling bridge MMIO windows On boot we save the configuration space of PCIe bridges. We do this so when we get an EEH event and everything gets reset that we can restore them. Unfortunately we save this state before we've enabled the MMIO space on the bridges. Hence if we have to reset the bridge when we come back MMIO is not enabled and we end up taking an PE freeze when the driver starts accessing again. This patch forces the memory/MMIO and bus mastering on when restoring bridges on EEH. Ideally we'd do this correctly by saving the configuration space writes later, but that will have to come later in a larger EEH rewrite. For now we have this simple fix. The original bug can be triggered on a boston machine by doing: echo 0x8000000000000000 > /sys/kernel/debug/powerpc/PCI0001/err_injct_outbound On boston, this PHB has a PCIe switch on it. Without this patch, you'll see two EEH events, 1 expected and 1 the failure we are fixing here. The second EEH event causes the anything under the PHB to disappear (i.e. the i40e eth). With this patch, only 1 EEH event occurs and devices properly recover. Fixes: 652defed4875 ("powerpc/eeh: Check PCIe link after reset") Cc: stable@vger.kernel.org # v3.11+ Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Michael Neuling Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_pe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 2d4956e97aa9..ee5a67d57aab 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -807,7 +807,8 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); /* PCI Command: 0x4 */ - eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1]); + eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); /* Check the PCIe link is ready */ eeh_bridge_check_link(edev); From ab60ee7bf9a84954f50a66a3d835860e80f99b7f Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Apr 2018 12:17:05 -0700 Subject: [PATCH 306/660] cifs: smbd: Check for iov length on sending the last iov When sending the last iov that breaks into smaller buffers to fit the transfer size, it's necessary to check if this is the last iov. If this is the latest iov, stop and proceed to send pages. Signed-off-by: Long Li Cc: stable@vger.kernel.org Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/smbdirect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 5008af546dd1..d611ed0537fd 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -2194,6 +2194,8 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) goto done; } i++; + if (i == rqst->rq_nvec) + break; } start = i; buflen = 0; From 94e5395d2403c8bc2504a7cbe4c4caaacb7b8b84 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 18 Apr 2018 22:54:59 -0400 Subject: [PATCH 307/660] scsi: mptsas: Disable WRITE SAME First generation MPT Fusion controllers can not translate WRITE SAME when the attached device is a SATA drive. Disable WRITE SAME support. Reported-by: Nikola Ciprich Cc: Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptsas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 231f3a1e27bf..86503f60468f 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1994,6 +1994,7 @@ static struct scsi_host_template mptsas_driver_template = { .cmd_per_lun = 7, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = mptscsih_host_attrs, + .no_write_same = 1, }; static int mptsas_get_linkerrors(struct sas_phy *phy) From 4d79e7d0f0cfe22c05c21a1008fa000f153119aa Mon Sep 17 00:00:00 2001 From: John Pittman Date: Sat, 24 Mar 2018 09:30:48 -0400 Subject: [PATCH 308/660] scsi: core: remove reference to scsi_show_extd_sense() In commit 2104551969e8 ("scsi: use per-cpu buffer for formatting sense"), function scsi_show_extd_sense() was removed, switching use over to scsi_format_extd_sense(). Remove last reference to scsi_show_extd_sense() in include/scsi/scsi_dbg.h. Signed-off-by: John Pittman Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_dbg.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/scsi/scsi_dbg.h b/include/scsi/scsi_dbg.h index 04e0679767f6..e03bd9d41fa8 100644 --- a/include/scsi/scsi_dbg.h +++ b/include/scsi/scsi_dbg.h @@ -11,8 +11,6 @@ struct scsi_sense_hdr; extern void scsi_print_command(struct scsi_cmnd *); extern size_t __scsi_format_command(char *, size_t, const unsigned char *, size_t); -extern void scsi_show_extd_sense(const struct scsi_device *, const char *, - unsigned char, unsigned char); extern void scsi_print_sense_hdr(const struct scsi_device *, const char *, const struct scsi_sense_hdr *); extern void scsi_print_sense(const struct scsi_cmnd *); From 6667e6d91c88a788d5ab1b1e999d245473e84a90 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Wed, 28 Mar 2018 12:42:18 +0300 Subject: [PATCH 309/660] scsi: ufs: add trace event for ufs upiu Add UFS Protocol Information Units(upiu) trace events for ufs driver, used to trace various ufs transaction types- command, task-management and device management. The trace-point format is generic and can be easily adapted to trace other upius if needed. Currently tracing ufs transaction of type 'device management', which this patch introduce, cannot be obtained from any other trace. Device management transactions are used for communication with the device such as reading and writing descriptor or attributes etc. Signed-off-by: Ohad Sharabi Reviewed-by: Stanislav Nijnikov Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 40 ++++++++++++++++++++++++++++++++++++++ include/trace/events/ufs.h | 27 +++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c5b1bf1cadcb..00e79057f870 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -276,6 +276,35 @@ static inline void ufshcd_remove_non_printable(char *val) *val = ' '; } +static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag, + const char *str) +{ + struct utp_upiu_req *rq = hba->lrb[tag].ucd_req_ptr; + + trace_ufshcd_upiu(dev_name(hba->dev), str, &rq->header, &rq->sc.cdb); +} + +static void ufshcd_add_query_upiu_trace(struct ufs_hba *hba, unsigned int tag, + const char *str) +{ + struct utp_upiu_req *rq = hba->lrb[tag].ucd_req_ptr; + + trace_ufshcd_upiu(dev_name(hba->dev), str, &rq->header, &rq->qr); +} + +static void ufshcd_add_tm_upiu_trace(struct ufs_hba *hba, unsigned int tag, + const char *str) +{ + struct utp_task_req_desc *descp; + struct utp_upiu_task_req *task_req; + int off = (int)tag - hba->nutrs; + + descp = &hba->utmrdl_base_addr[off]; + task_req = (struct utp_upiu_task_req *)descp->task_req_upiu; + trace_ufshcd_upiu(dev_name(hba->dev), str, &task_req->header, + &task_req->input_param1); +} + static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag, const char *str) { @@ -285,6 +314,9 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, struct ufshcd_lrb *lrbp; int transfer_len = -1; + /* trace UPIU also */ + ufshcd_add_cmd_upiu_trace(hba, tag, str); + if (!trace_ufshcd_command_enabled()) return; @@ -2550,6 +2582,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, hba->dev_cmd.complete = &wait; + ufshcd_add_query_upiu_trace(hba, tag, "query_send"); /* Make sure descriptors are ready before ringing the doorbell */ wmb(); spin_lock_irqsave(hba->host->host_lock, flags); @@ -2559,6 +2592,9 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout); + ufshcd_add_query_upiu_trace(hba, tag, + err ? "query_complete_err" : "query_complete"); + out_put_tag: ufshcd_put_dev_cmd_tag(hba, tag); wake_up(&hba->dev_cmd.tag_wq); @@ -5443,11 +5479,14 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id, spin_unlock_irqrestore(host->host_lock, flags); + ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_send"); + /* wait until the task management command is completed */ err = wait_event_timeout(hba->tm_wq, test_bit(free_slot, &hba->tm_condition), msecs_to_jiffies(TM_CMD_TIMEOUT)); if (!err) { + ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete_err"); dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n", __func__, tm_function); if (ufshcd_clear_tm_cmd(hba, free_slot)) @@ -5456,6 +5495,7 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id, err = -ETIMEDOUT; } else { err = ufshcd_task_req_compl(hba, free_slot, tm_response); + ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete"); } clear_bit(free_slot, &hba->tm_condition); diff --git a/include/trace/events/ufs.h b/include/trace/events/ufs.h index bf6f82673492..f8260e5c79ad 100644 --- a/include/trace/events/ufs.h +++ b/include/trace/events/ufs.h @@ -257,6 +257,33 @@ TRACE_EVENT(ufshcd_command, ) ); +TRACE_EVENT(ufshcd_upiu, + TP_PROTO(const char *dev_name, const char *str, void *hdr, void *tsf), + + TP_ARGS(dev_name, str, hdr, tsf), + + TP_STRUCT__entry( + __string(dev_name, dev_name) + __string(str, str) + __array(unsigned char, hdr, 12) + __array(unsigned char, tsf, 16) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __assign_str(str, str); + memcpy(__entry->hdr, hdr, sizeof(__entry->hdr)); + memcpy(__entry->tsf, tsf, sizeof(__entry->tsf)); + ), + + TP_printk( + "%s: %s: HDR:%s, CDB:%s", + __get_str(str), __get_str(dev_name), + __print_hex(__entry->hdr, sizeof(__entry->hdr)), + __print_hex(__entry->tsf, sizeof(__entry->tsf)) + ) +); + #endif /* if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ From fb1633d56b0025233ed3dc49b44544748d509d9d Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 21 Mar 2018 21:04:12 +0000 Subject: [PATCH 310/660] scsi: megaraid_sas: Do not log an error if FW successfully initializes. Fixes: 2d2c2331673c ("scsi: megaraid_sas: modified few prints in OCR and IOC INIT path") Signed-off-by: Vinson Lee Acked-by: Shivasharan S Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index ce97cde3b41c..f4d988dd1e9d 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -1124,12 +1124,12 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) goto fail_fw_init; } - ret = 0; + return 0; fail_fw_init: dev_err(&instance->pdev->dev, - "Init cmd return status %s for SCSI host %d\n", - ret ? "FAILED" : "SUCCESS", instance->host->host_no); + "Init cmd return status FAILED for SCSI host %d\n", + instance->host->host_no); return ret; } From 505aa4b6a8834a2300971c5220c380c3271ebde3 Mon Sep 17 00:00:00 2001 From: Mahesh Rajashekhara Date: Tue, 17 Apr 2018 17:03:12 +0530 Subject: [PATCH 311/660] scsi: sd: Defer spinning up drive while SANITIZE is in progress A drive being sanitized will return NOT READY / ASC 0x4 / ASCQ 0x1b ("LOGICAL UNIT NOT READY. SANITIZE IN PROGRESS"). Prevent spinning up the drive until this condition clears. [mkp: tweaked commit message] Signed-off-by: Mahesh Rajashekhara Cc: Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a6201e696ab9..9421d9877730 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2121,6 +2121,8 @@ sd_spinup_disk(struct scsi_disk *sdkp) break; /* standby */ if (sshdr.asc == 4 && sshdr.ascq == 0xc) break; /* unavailable */ + if (sshdr.asc == 4 && sshdr.ascq == 0x1b) + break; /* sanitize in progress */ /* * Issue command to spin up drive when not ready */ From ccce20fc7968d546fb1e8e147bf5cdc8afc4278a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 16 Apr 2018 18:04:41 -0700 Subject: [PATCH 312/660] scsi: sd_zbc: Avoid that resetting a zone fails sporadically Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is called from sd_probe_async() and since sd_revalidate_disk() calls sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called concurrently with blkdev_report_zones() and/or blkdev_reset_zones(). That can cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets q->nr_zones to zero before restoring it to the actual value, even if no drive characteristics have changed. Avoid that this can happen by making the following changes: - Protect the code that updates zone information with blk_queue_enter() and blk_queue_exit(). - Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that these functions do not modify struct scsi_disk before all zone information has been obtained. Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track legacy requests"; kernel v4.15) the request queue freezing mechanism also affects legacy request queues. Fixes: 89d947561077 ("sd: Implement support for ZBC devices") Signed-off-by: Bart Van Assche Cc: Jens Axboe Cc: Damien Le Moal Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: stable@vger.kernel.org # v4.16 Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 136 ++++++++++++++++++++++++----------------- include/linux/blkdev.h | 5 ++ 2 files changed, 85 insertions(+), 56 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 41df75eea57b..210407cd2341 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf) * * Check that all zones of the device are equal. The last zone can however * be smaller. The zone size must also be a power of two number of LBAs. + * + * Returns the zone size in bytes upon success or an error code upon failure. */ -static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) +static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp) { u64 zone_blocks = 0; sector_t block = 0; @@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) int ret; u8 same; - sdkp->zone_blocks = 0; - /* Get a buffer */ buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) /* Parse zone descriptors */ while (rec < buf + buf_len) { - zone_blocks = get_unaligned_be64(&rec[8]); - if (sdkp->zone_blocks == 0) { - sdkp->zone_blocks = zone_blocks; - } else if (zone_blocks != sdkp->zone_blocks && - (block + zone_blocks < sdkp->capacity - || zone_blocks > sdkp->zone_blocks)) { - zone_blocks = 0; + u64 this_zone_blocks = get_unaligned_be64(&rec[8]); + + if (zone_blocks == 0) { + zone_blocks = this_zone_blocks; + } else if (this_zone_blocks != zone_blocks && + (block + this_zone_blocks < sdkp->capacity + || this_zone_blocks > zone_blocks)) { + this_zone_blocks = 0; goto out; } - block += zone_blocks; + block += this_zone_blocks; rec += 64; } @@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) } while (block < sdkp->capacity); - zone_blocks = sdkp->zone_blocks; - out: if (!zone_blocks) { if (sdkp->first_scan) @@ -488,8 +487,7 @@ out: "Zone size too large\n"); ret = -ENODEV; } else { - sdkp->zone_blocks = zone_blocks; - sdkp->zone_shift = ilog2(zone_blocks); + ret = zone_blocks; } out_free: @@ -500,15 +498,14 @@ out_free: /** * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). - * @sdkp: The disk of the bitmap + * @nr_zones: Number of zones to allocate space for. + * @numa_node: NUMA node to allocate the memory from. */ -static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) +static inline unsigned long * +sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node) { - struct request_queue *q = sdkp->disk->queue; - - return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones) - * sizeof(unsigned long), - GFP_KERNEL, q->node); + return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long), + GFP_KERNEL, numa_node); } /** @@ -516,6 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) * @sdkp: disk used * @buf: report reply buffer * @buflen: length of @buf + * @zone_shift: logarithm base 2 of the number of blocks in a zone * @seq_zones_bitmap: bitmap of sequential zones to set * * Parse reported zone descriptors in @buf to identify sequential zones and @@ -525,7 +523,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) * Return the LBA after the last zone reported. */ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, - unsigned int buflen, + unsigned int buflen, u32 zone_shift, unsigned long *seq_zones_bitmap) { sector_t lba, next_lba = sdkp->capacity; @@ -544,7 +542,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, if (type != ZBC_ZONE_TYPE_CONV && cond != ZBC_ZONE_COND_READONLY && cond != ZBC_ZONE_COND_OFFLINE) - set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap); + set_bit(lba >> zone_shift, seq_zones_bitmap); next_lba = lba + get_unaligned_be64(&rec[8]); rec += 64; } @@ -553,12 +551,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, } /** - * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. + * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap. * @sdkp: target disk + * @zone_shift: logarithm base 2 of the number of blocks in a zone + * @nr_zones: number of zones to set up a seq zone bitmap for * * Allocate a zone bitmap and initialize it by identifying sequential zones. */ -static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) +static unsigned long * +sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift, + u32 nr_zones) { struct request_queue *q = sdkp->disk->queue; unsigned long *seq_zones_bitmap; @@ -566,9 +568,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) unsigned char *buf; int ret = -ENOMEM; - seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); + seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node); if (!seq_zones_bitmap) - return -ENOMEM; + return ERR_PTR(-ENOMEM); buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -579,7 +581,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) if (ret) goto out; lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, - seq_zones_bitmap); + zone_shift, seq_zones_bitmap); } if (lba != sdkp->capacity) { @@ -591,12 +593,9 @@ out: kfree(buf); if (ret) { kfree(seq_zones_bitmap); - return ret; + return ERR_PTR(ret); } - - q->seq_zones_bitmap = seq_zones_bitmap; - - return 0; + return seq_zones_bitmap; } static void sd_zbc_cleanup(struct scsi_disk *sdkp) @@ -612,44 +611,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp) q->nr_zones = 0; } -static int sd_zbc_setup(struct scsi_disk *sdkp) +static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks) { struct request_queue *q = sdkp->disk->queue; + u32 zone_shift = ilog2(zone_blocks); + u32 nr_zones; int ret; - /* READ16/WRITE16 is mandatory for ZBC disks */ - sdkp->device->use_16_for_rw = 1; - sdkp->device->use_10_for_rw = 0; - /* chunk_sectors indicates the zone size */ - blk_queue_chunk_sectors(sdkp->disk->queue, - logical_to_sectors(sdkp->device, sdkp->zone_blocks)); - sdkp->nr_zones = - round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift; + blk_queue_chunk_sectors(q, + logical_to_sectors(sdkp->device, zone_blocks)); + nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift; /* * Initialize the device request queue information if the number * of zones changed. */ - if (sdkp->nr_zones != q->nr_zones) { + if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) { + unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; + size_t zone_bitmap_size; - sd_zbc_cleanup(sdkp); - - q->nr_zones = sdkp->nr_zones; - if (sdkp->nr_zones) { - q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); - if (!q->seq_zones_wlock) { + if (nr_zones) { + seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones, + q->node); + if (!seq_zones_wlock) { ret = -ENOMEM; goto err; } - ret = sd_zbc_setup_seq_zones_bitmap(sdkp); - if (ret) { - sd_zbc_cleanup(sdkp); + seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp, + zone_shift, nr_zones); + if (IS_ERR(seq_zones_bitmap)) { + ret = PTR_ERR(seq_zones_bitmap); + kfree(seq_zones_wlock); goto err; } } + zone_bitmap_size = BITS_TO_LONGS(nr_zones) * + sizeof(unsigned long); + blk_mq_freeze_queue(q); + if (q->nr_zones != nr_zones) { + /* READ16/WRITE16 is mandatory for ZBC disks */ + sdkp->device->use_16_for_rw = 1; + sdkp->device->use_10_for_rw = 0; + sdkp->zone_blocks = zone_blocks; + sdkp->zone_shift = zone_shift; + sdkp->nr_zones = nr_zones; + q->nr_zones = nr_zones; + swap(q->seq_zones_wlock, seq_zones_wlock); + swap(q->seq_zones_bitmap, seq_zones_bitmap); + } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size) != 0) { + memcpy(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size); + } + blk_mq_unfreeze_queue(q); + kfree(seq_zones_wlock); + kfree(seq_zones_bitmap); } return 0; @@ -661,6 +680,7 @@ err: int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) { + int64_t zone_blocks; int ret; if (!sd_is_zoned(sdkp)) @@ -697,12 +717,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) * Check zone size: only devices with a constant zone size (except * an eventual last runt zone) that is a power of 2 are supported. */ - ret = sd_zbc_check_zone_size(sdkp); - if (ret) + zone_blocks = sd_zbc_check_zone_size(sdkp); + ret = -EFBIG; + if (zone_blocks != (u32)zone_blocks) + goto err; + ret = zone_blocks; + if (ret < 0) goto err; /* The drive satisfies the kernel restrictions: set it up */ - ret = sd_zbc_setup(sdkp); + ret = sd_zbc_setup(sdkp, zone_blocks); if (ret) goto err; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9af3e0f430bc..21e21f273a21 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -605,6 +605,11 @@ struct request_queue { * initialized by the low level device driver (e.g. scsi/sd.c). * Stacking drivers (device mappers) may or may not initialize * these fields. + * + * Reads of this information must be protected with blk_queue_enter() / + * blk_queue_exit(). Modifying this information is only allowed while + * no requests are being processed. See also blk_mq_freeze_queue() and + * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; unsigned long *seq_zones_bitmap; From 8e1ceafe50ec4d1bcfae154dd70e7cb6946a6177 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 16 Apr 2018 17:48:41 +0800 Subject: [PATCH 313/660] scsi: target: fix crash with iscsi target and dvd When the current page can't be added to bio, one new bio should be created for adding this page again, instead of ignoring this page. This patch fixes kernel crash with iscsi target and dvd, as reported by Wakko. Cc: Wakko Warner Cc: Bart Van Assche Cc: target-devel@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Nicholas A. Bellinger" Cc: Christoph Hellwig Fixes: 84c8590646d5b35804 ("target: avoid accessing .bi_vcnt directly") Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pscsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 0d99b242e82e..6cb933ecc084 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -890,6 +890,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, bytes = min(bytes, data_len); if (!bio) { +new_bio: nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages); nr_pages -= nr_vecs; /* @@ -931,6 +932,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, * be allocated with pscsi_get_bio() above. */ bio = NULL; + goto new_bio; } data_len -= bytes; From 56376c5864f8ff4ba7c78a80ae857eee3b1d23d8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 19 Apr 2018 16:22:20 +1000 Subject: [PATCH 314/660] powerpc/kvm: Fix lockups when running KVM guests on Power8 When running KVM guests on Power8 we can see a lockup where one CPU stops responding. This often leads to a message such as: watchdog: CPU 136 detected hard LOCKUP on other CPUs 72 Task dump for CPU 72: qemu-system-ppc R running task 10560 20917 20908 0x00040004 And then backtraces on other CPUs, such as: Task dump for CPU 48: ksmd R running task 10032 1519 2 0x00000804 Call Trace: ... --- interrupt: 901 at smp_call_function_many+0x3c8/0x460 LR = smp_call_function_many+0x37c/0x460 pmdp_invalidate+0x100/0x1b0 __split_huge_pmd+0x52c/0xdb0 try_to_unmap_one+0x764/0x8b0 rmap_walk_anon+0x15c/0x370 try_to_unmap+0xb4/0x170 split_huge_page_to_list+0x148/0xa30 try_to_merge_one_page+0xc8/0x990 try_to_merge_with_ksm_page+0x74/0xf0 ksm_scan_thread+0x10ec/0x1ac0 kthread+0x160/0x1a0 ret_from_kernel_thread+0x5c/0x78 This is caused by commit 8c1c7fb0b5ec ("powerpc/64s/idle: avoid sync for KVM state when waking from idle"), which added a check in pnv_powersave_wakeup() to see if the kvm_hstate.hwthread_state is already set to KVM_HWTHREAD_IN_KERNEL, and if so to skip the store and test of kvm_hstate.hwthread_req. The problem is that the primary does not set KVM_HWTHREAD_IN_KVM when entering the guest, so it can then come out to cede with KVM_HWTHREAD_IN_KERNEL set. It can then go idle in kvm_do_nap after setting hwthread_req to 1, but because hwthread_state is still KVM_HWTHREAD_IN_KERNEL we will skip the test of hwthread_req when we wake up from idle and won't go to kvm_start_guest. From there the thread will return somewhere garbage and crash. Fix it by skipping the store of hwthread_state, but not the test of hwthread_req, when coming out of idle. It's OK to skip the sync in that case because hwthread_req will have been set on the same thread, so there is no synchronisation required. Fixes: 8c1c7fb0b5ec ("powerpc/64s/idle: avoid sync for KVM state when waking from idle") Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 79d005445c6c..e734f6e45abc 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -553,12 +553,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE lbz r0,HSTATE_HWTHREAD_STATE(r13) cmpwi r0,KVM_HWTHREAD_IN_KERNEL - beq 1f + beq 0f li r0,KVM_HWTHREAD_IN_KERNEL stb r0,HSTATE_HWTHREAD_STATE(r13) /* Order setting hwthread_state vs. testing hwthread_req */ sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) +0: lbz r0,HSTATE_HWTHREAD_REQ(r13) cmpwi r0,0 beq 1f b kvm_start_guest From b658912cb023cd6f8e46963d29779903d3c10538 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 19 Apr 2018 09:25:15 +0200 Subject: [PATCH 315/660] HID: i2c-hid: fix inverted return value from i2c_hid_command() i2c_hid_command() returns non-zero in error cases (the actual errno). Error handling in for I2C_HID_QUIRK_RESEND_REPORT_DESCR case in i2c_hid_resume() had the check inverted; fix that. Fixes: 3e83eda467 ("HID: i2c-hid: Fix resume issue on Raydium touchscreen device") Reported-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 615a91ac93bd..963328674e93 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -1229,7 +1229,7 @@ static int i2c_hid_resume(struct device *dev) */ if (ihid->quirks & I2C_HID_QUIRK_RESEND_REPORT_DESCR) { ret = i2c_hid_command(client, &hid_report_descr_cmd, NULL, 0); - if (!ret) + if (ret) return ret; } From ed4564babeeee4fb19fe4ec0beabe29754e380f9 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 8 Apr 2018 23:57:07 +0200 Subject: [PATCH 316/660] drivers: change struct device_driver::coredump() return type to void Upon submitting a patch for mwifiex [1] it was discussed whether this callback function could fail. To keep things simple there is no need for the error code so the driver can do the task synchronous or not without worries. Currently the device driver core already ignores the return value so changing it to void. [1] https://patchwork.kernel.org/patch/10231933/ Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index 0059b99e1f25..477956990f5e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -256,7 +256,9 @@ enum probe_type { * automatically. * @pm: Power management operations of the device which matched * this driver. - * @coredump: Called through sysfs to initiate a device coredump. + * @coredump: Called when sysfs entry is written to. The device driver + * is expected to call the dev_coredump API resulting in a + * uevent. * @p: Driver core's private data, no one other than the driver * core can touch this. * @@ -288,7 +290,7 @@ struct device_driver { const struct attribute_group **groups; const struct dev_pm_ops *pm; - int (*coredump) (struct device *dev); + void (*coredump) (struct device *dev); struct driver_private *p; }; From 3ce0d5aa265bcc0a4b281cb0cabf92491276101b Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 19 Apr 2018 13:29:04 +0800 Subject: [PATCH 317/660] ALSA: hda/realtek - set PINCFG_HEADSET_MIC to parse_flags Otherwise, the pin will be regarded as microphone, and the jack name is "Mic Phantom", it is always on in the pulseaudio even nothing is plugged into the jack. So the UI is confusing to users since the microphone always shows up in the UI even there is no microphone plugged. After adding this flag, the jack name is "Headset Mic Phantom", then the pulseaudio can handle its detection correctly. Fixes: f0ba9d699e5c ("ALSA: hda/realtek - Fix Dell headset Mic can't record") Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aef1f52db7d9..c3b63b7a4ba4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6370,6 +6370,8 @@ static const struct hda_fixup alc269_fixups[] = { { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ { } }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC }, }; From a3dafb2200bf3c13905a088e82ae11f1eb275a83 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 19 Apr 2018 13:29:05 +0800 Subject: [PATCH 318/660] ALSA: hda/realtek - adjust the location of one mic There are two front mics on this machine, if we don't adjust the location for one of them, they will have the same mixer name, pulseaudio can't handle this situation. After applying this FIXUP, they will have different mixer name, then pulseaudio can handle them correctly. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c3b63b7a4ba4..fc77bf7a1544 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6575,6 +6575,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x3138, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), From 39f2ff0816e5421476c2bc538b68b4bb0708a78e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Apr 2018 12:23:39 +0200 Subject: [PATCH 319/660] netfilter: nf_tables: NAT chain and extensions require NF_TABLES Move these options inside the scope of the 'if' NF_TABLES and NF_TABLES_IPV6 dependencies. This patch fixes: net/ipv6/netfilter/nft_chain_nat_ipv6.o: In function `nft_nat_do_chain': >> net/ipv6/netfilter/nft_chain_nat_ipv6.c:37: undefined reference to `nft_do_chain' net/ipv6/netfilter/nft_chain_nat_ipv6.o: In function `nft_chain_nat_ipv6_exit': >> net/ipv6/netfilter/nft_chain_nat_ipv6.c:94: undefined reference to `nft_unregister_chain_type' net/ipv6/netfilter/nft_chain_nat_ipv6.o: In function `nft_chain_nat_ipv6_init': >> net/ipv6/netfilter/nft_chain_nat_ipv6.c:87: undefined reference to `nft_register_chain_type' that happens with: CONFIG_NF_TABLES=m CONFIG_NFT_CHAIN_NAT_IPV6=y Fixes: 02c7b25e5f54 ("netfilter: nf_tables: build-in filter chain type") Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/Kconfig | 55 +++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ccbfa83e4bb0..ce77bcc2490c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -48,6 +48,34 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. +if NF_NAT_IPV6 + +config NFT_CHAIN_NAT_IPV6 + tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. + +config NFT_MASQ_IPV6 + tristate "IPv6 masquerade support for nf_tables" + depends on NFT_MASQ + select NF_NAT_MASQUERADE_IPV6 + help + This is the expression that provides IPv4 masquerading support for + nf_tables. + +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NFT_REDIR + select NF_NAT_REDIRECT + help + This is the expression that provides IPv4 redirect support for + nf_tables. + +endif # NF_NAT_IPV6 + config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT @@ -107,39 +135,12 @@ config NF_NAT_IPV6 if NF_NAT_IPV6 -config NFT_CHAIN_NAT_IPV6 - depends on NF_TABLES_IPV6 - tristate "IPv6 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv6 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. - config NF_NAT_MASQUERADE_IPV6 tristate "IPv6 masquerade support" help This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. -config NFT_MASQ_IPV6 - tristate "IPv6 masquerade support for nf_tables" - depends on NF_TABLES_IPV6 - depends on NFT_MASQ - select NF_NAT_MASQUERADE_IPV6 - help - This is the expression that provides IPv4 masquerading support for - nf_tables. - -config NFT_REDIR_IPV6 - tristate "IPv6 redirect support for nf_tables" - depends on NF_TABLES_IPV6 - depends on NFT_REDIR - select NF_NAT_REDIRECT - help - This is the expression that provides IPv4 redirect support for - nf_tables. - endif # NF_NAT_IPV6 config IP6_NF_IPTABLES From d71efb599ad42ef1e564c652d8084252bdc85edf Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 18 Apr 2018 23:35:34 +0900 Subject: [PATCH 320/660] netfilter: nf_tables: fix out-of-bounds in nft_chain_commit_update When chain name is changed, nft_chain_commit_update is called. In the nft_chain_commit_update, trans->ctx.chain->name has old chain name and nft_trans_chain_name(trans) has new chain name. If new chain name is longer than old chain name, KASAN warns slab-out-of-bounds. [ 175.015012] BUG: KASAN: slab-out-of-bounds in strcpy+0x9e/0xb0 [ 175.022735] Write of size 1 at addr ffff880114e022da by task iptables-compat/1458 [ 175.031353] CPU: 0 PID: 1458 Comm: iptables-compat Not tainted 4.16.0-rc7+ #146 [ 175.031353] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015 [ 175.031353] Call Trace: [ 175.031353] dump_stack+0x68/0xa0 [ 175.031353] print_address_description+0xd0/0x260 [ 175.031353] ? strcpy+0x9e/0xb0 [ 175.031353] kasan_report+0x234/0x350 [ 175.031353] __asan_report_store1_noabort+0x1c/0x20 [ 175.031353] strcpy+0x9e/0xb0 [ 175.031353] nf_tables_commit+0x1ccc/0x2990 [ 175.031353] nfnetlink_rcv+0x141e/0x16c0 [ 175.031353] ? nfnetlink_net_init+0x150/0x150 [ 175.031353] ? lock_acquire+0x370/0x370 [ 175.031353] ? lock_acquire+0x370/0x370 [ 175.031353] netlink_unicast+0x444/0x640 [ 175.031353] ? netlink_attachskb+0x700/0x700 [ 175.031353] ? _copy_from_iter_full+0x180/0x740 [ 175.031353] ? kasan_check_write+0x14/0x20 [ 175.031353] ? _copy_from_user+0x9b/0xd0 [ 175.031353] netlink_sendmsg+0x845/0xc70 [ ... ] Steps to reproduce: iptables-compat -N 1 iptables-compat -E 1 aaaaaaaaa Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 102ad873acb4..04d4e3772584 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5745,7 +5745,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) struct nft_base_chain *basechain; if (nft_trans_chain_name(trans)) - strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); + swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); if (!nft_is_base_chain(trans->ctx.chain)) return; From c3bca5d450b620dd3d36e14b5e1f43639fd47d6b Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 17 Apr 2018 14:57:42 -0700 Subject: [PATCH 321/660] posix-cpu-timers: Ensure set_process_cpu_timer is always evaluated Commit a9445e47d897 ("posix-cpu-timers: Make set_process_cpu_timer() more robust") moved the check into the 'if' statement. Unfortunately, it did so on the right side of an && which means that it may get short circuited and never evaluated. This is easily reproduced with: $ cat loop.c void main() { struct rlimit res; /* set the CPU time limit */ getrlimit(RLIMIT_CPU,&res); res.rlim_cur = 2; res.rlim_max = 2; setrlimit(RLIMIT_CPU,&res); while (1); } Which will hang forever instead of being killed. Fix this by pulling the evaluation out of the if statement but checking the return value instead. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1568337 Fixes: a9445e47d897 ("posix-cpu-timers: Make set_process_cpu_timer() more robust") Signed-off-by: Laura Abbott Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: "Max R . P . Grossmann" Cc: John Stultz Link: https://lkml.kernel.org/r/20180417215742.2521-1-labbott@redhat.com --- kernel/time/posix-cpu-timers.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 2541bd89f20e..5a6251ac6f7a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1205,10 +1205,12 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, u64 *newval, u64 *oldval) { u64 now; + int ret; WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); + ret = cpu_timer_sample_group(clock_idx, tsk, &now); - if (oldval && cpu_timer_sample_group(clock_idx, tsk, &now) != -EINVAL) { + if (oldval && ret != -EINVAL) { /* * We are setting itimer. The *oldval is absolute and we update * it to be relative, *newval argument is relative and we update From 7407188489c62a7b5694bc75a6db2b82af94c9a5 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Thu, 19 Apr 2018 14:04:43 +0800 Subject: [PATCH 322/660] clocksource/imx-tpm: Correct -ETIME return condition check The additional brakects added to tpm_set_next_event's return value computation causes (int) forced type conversion NOT taking effect, and the incorrect value return will cause various system timer issue, like RCU stall etc.. Remove the additional brackets to make sure tpm_set_next_event always returns correct value. Fixes: 059ab7b82eec ("clocksource/drivers/imx-tpm: Add imx tpm timer support") Signed-off-by: Anson Huang Signed-off-by: Thomas Gleixner Acked-by: Dong Aisheng Cc: stable@vger.kernel.org Cc: daniel.lezcano@linaro.org Cc: Linux-imx@nxp.com Link: https://lkml.kernel.org/r/1524117883-2484-1-git-send-email-Anson.Huang@nxp.com --- drivers/clocksource/timer-imx-tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 05d97a6871d8..6c8318470b48 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -114,7 +114,7 @@ static int tpm_set_next_event(unsigned long delta, * of writing CNT registers which may cause the min_delta event got * missed, so we need add a ETIME check here in case it happened. */ - return (int)((next - now) <= 0) ? -ETIME : 0; + return (int)(next - now) <= 0 ? -ETIME : 0; } static int tpm_set_state_oneshot(struct clock_event_device *evt) From a8419a0cd98ddf628a9e38a92110af7cc650dde7 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 18 Apr 2018 18:46:37 +0100 Subject: [PATCH 323/660] ASoC: msm8916-wcd-analog: use threaded context for mbhc events As snd_soc_jack_report() can sleep, move handling of mbhc events to a thread context rather than in interrupt context. Fixes: de66b3455023 ('ASoC: codecs: msm8916-wcd-analog: add MBHC support') Reported-by: Bjorn Andersson Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown --- sound/soc/codecs/msm8916-wcd-analog.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 12ee83d52405..b7cf7cce95fe 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -1187,7 +1187,8 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) return irq; } - ret = devm_request_irq(dev, irq, pm8916_mbhc_switch_irq_handler, + ret = devm_request_threaded_irq(dev, irq, NULL, + pm8916_mbhc_switch_irq_handler, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "mbhc switch irq", priv); @@ -1201,7 +1202,8 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) return irq; } - ret = devm_request_irq(dev, irq, mbhc_btn_press_irq_handler, + ret = devm_request_threaded_irq(dev, irq, NULL, + mbhc_btn_press_irq_handler, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "mbhc btn press irq", priv); @@ -1214,7 +1216,8 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) return irq; } - ret = devm_request_irq(dev, irq, mbhc_btn_release_irq_handler, + ret = devm_request_threaded_irq(dev, irq, NULL, + mbhc_btn_release_irq_handler, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "mbhc btn release irq", priv); From 0cbc94daa55441c21999e96a07061952d873dcb7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 18 Apr 2018 20:20:57 +0200 Subject: [PATCH 324/660] mmc: renesas_sdhi_internal_dmac: limit DMA RX for old SoCs Early revisions of certain SoCs cannot do multiple DMA RX streams in parallel. To avoid data corruption, only allow one DMA RX channel and fall back to PIO, if needed. Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Tested-by: Nguyen Viet Dung Reviewed-by: Simon Horman Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 8e0acd197c43..6af946d16d24 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -62,6 +63,17 @@ * need a custom accessor. */ +static unsigned long global_flags; +/* + * Workaround for avoiding to use RX DMAC by multiple channels. + * On R-Car H3 ES1.* and M3-W ES1.0, when multiple SDHI channels use + * RX DMAC simultaneously, sometimes hundreds of bytes data are not + * stored into the system memory even if the DMAC interrupt happened. + * So, this driver then uses one RX DMAC channel only. + */ +#define SDHI_INTERNAL_DMAC_ONE_RX_ONLY 0 +#define SDHI_INTERNAL_DMAC_RX_IN_USE 1 + /* Definitions for sampling clocks */ static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = { { @@ -126,6 +138,9 @@ renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) { renesas_sdhi_internal_dmac_dm_write(host, DM_CM_RST, RST_RESERVED_BITS | val); + if (host->data && host->data->flags & MMC_DATA_READ) + clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags); + renesas_sdhi_internal_dmac_enable_dma(host, true); } @@ -155,6 +170,9 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host, if (data->flags & MMC_DATA_READ) { dtran_mode |= DTRAN_MODE_CH_NUM_CH1; dir = DMA_FROM_DEVICE; + if (test_bit(SDHI_INTERNAL_DMAC_ONE_RX_ONLY, &global_flags) && + test_and_set_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags)) + goto force_pio; } else { dtran_mode |= DTRAN_MODE_CH_NUM_CH0; dir = DMA_TO_DEVICE; @@ -208,6 +226,9 @@ static void renesas_sdhi_internal_dmac_complete_tasklet_fn(unsigned long arg) renesas_sdhi_internal_dmac_enable_dma(host, false); dma_unmap_sg(&host->pdev->dev, host->sg_ptr, host->sg_len, dir); + if (dir == DMA_FROM_DEVICE) + clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags); + tmio_mmc_do_data_irq(host); out: spin_unlock_irq(&host->lock); @@ -251,18 +272,24 @@ static const struct tmio_mmc_dma_ops renesas_sdhi_internal_dmac_dma_ops = { * implementation as others may use a different implementation. */ static const struct soc_device_attribute gen3_soc_whitelist[] = { - { .soc_id = "r8a7795", .revision = "ES1.*" }, - { .soc_id = "r8a7795", .revision = "ES2.0" }, - { .soc_id = "r8a7796", .revision = "ES1.0" }, - { .soc_id = "r8a77995", .revision = "ES1.0" }, - { /* sentinel */ } + { .soc_id = "r8a7795", .revision = "ES1.*", + .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) }, + { .soc_id = "r8a7795", .revision = "ES2.0" }, + { .soc_id = "r8a7796", .revision = "ES1.0", + .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) }, + { .soc_id = "r8a77995", .revision = "ES1.0" }, + { /* sentinel */ } }; static int renesas_sdhi_internal_dmac_probe(struct platform_device *pdev) { - if (!soc_device_match(gen3_soc_whitelist)) + const struct soc_device_attribute *soc = soc_device_match(gen3_soc_whitelist); + + if (!soc) return -ENODEV; + global_flags |= (unsigned long)soc->data; + return renesas_sdhi_probe(pdev, &renesas_sdhi_internal_dmac_dma_ops); } From a6bb80f8efe04be2b2e2dcd1e5cd8fbfdaa35ab0 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Mon, 16 Apr 2018 19:05:30 -0700 Subject: [PATCH 325/660] drm/msm/dsi: check return value for video done waits Check for the return value of wait for video done waits and print appropriate error message. Signed-off-by: Abhinav Kumar Signed-off-by: Sean Paul Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 7a03a9489708..93a3cdde72ef 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -986,13 +986,19 @@ static void dsi_set_tx_power_mode(int mode, struct msm_dsi_host *msm_host) static void dsi_wait4video_done(struct msm_dsi_host *msm_host) { + u32 ret = 0; + struct device *dev = &msm_host->pdev->dev; + dsi_intr_ctrl(msm_host, DSI_IRQ_MASK_VIDEO_DONE, 1); reinit_completion(&msm_host->video_comp); - wait_for_completion_timeout(&msm_host->video_comp, + ret = wait_for_completion_timeout(&msm_host->video_comp, msecs_to_jiffies(70)); + if (ret <= 0) + dev_err(dev, "wait for video done timed out\n"); + dsi_intr_ctrl(msm_host, DSI_IRQ_MASK_VIDEO_DONE, 0); } From a56896c56e861464ff4de1e45a8e31c146eeeec1 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 17 Apr 2018 13:50:18 -0700 Subject: [PATCH 326/660] drm/msm/dsi: check video mode engine status before waiting Make sure the video mode engine is on before waiting for the video done interrupt. Changes in v4: - Move setting enabled to false earlier Changes in v3: - Move the return value check to another patch Changes in v2: - Replace pr_err with dev_err - Changed error message Signed-off-by: Abhinav Kumar Signed-off-by: Sean Paul Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 93a3cdde72ef..2f770386ab4a 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -173,6 +173,7 @@ struct msm_dsi_host { bool registered; bool power_on; + bool enabled; int irq; }; @@ -1007,7 +1008,7 @@ static void dsi_wait4video_eng_busy(struct msm_dsi_host *msm_host) if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO)) return; - if (msm_host->power_on) { + if (msm_host->power_on && msm_host->enabled) { dsi_wait4video_done(msm_host); /* delay 4 ms to skip BLLP */ usleep_range(2000, 4000); @@ -2209,7 +2210,7 @@ int msm_dsi_host_enable(struct mipi_dsi_host *host) * pm_runtime_put_autosuspend(&msm_host->pdev->dev); * } */ - + msm_host->enabled = true; return 0; } @@ -2217,6 +2218,7 @@ int msm_dsi_host_disable(struct mipi_dsi_host *host) { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); + msm_host->enabled = false; dsi_op_mode_config(msm_host, !!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO), false); From f1fa7ff4405697603f5e3901623533c54091f130 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 17 Apr 2018 13:50:19 -0700 Subject: [PATCH 327/660] drm/msm/dsi: implement auto PHY timing calculator for 10nm PHY Currently the DSI PHY timings are hard-coded for a specific panel for the 10nm PHY. Replace this with the auto PHY timing calculator which can calculate the PHY timings for any panel. Changes in v4: - None Changes in v3: - None Changes in v2: - None Reviewed-by: Sean Paul Reviewed-by: Archit Taneja Signed-off-by: Abhinav Kumar Signed-off-by: Sean Paul Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 109 +++++++++++++++++++++ drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 2 + drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c | 28 ------ 3 files changed, 111 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 8e9d5c255820..9a9fa0c75a13 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -265,6 +265,115 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, return 0; } +int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, + struct msm_dsi_phy_clk_request *clk_req) +{ + const unsigned long bit_rate = clk_req->bitclk_rate; + const unsigned long esc_rate = clk_req->escclk_rate; + s32 ui, ui_x8, lpx; + s32 tmax, tmin; + s32 pcnt0 = 50; + s32 pcnt1 = 50; + s32 pcnt2 = 10; + s32 pcnt3 = 30; + s32 pcnt4 = 10; + s32 pcnt5 = 2; + s32 coeff = 1000; /* Precision, should avoid overflow */ + s32 hb_en, hb_en_ckln; + s32 temp; + + if (!bit_rate || !esc_rate) + return -EINVAL; + + timing->hs_halfbyte_en = 0; + hb_en = 0; + timing->hs_halfbyte_en_ckln = 0; + hb_en_ckln = 0; + + ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); + ui_x8 = ui << 3; + lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); + + temp = S_DIV_ROUND_UP(38 * coeff, ui_x8); + tmin = max_t(s32, temp, 0); + temp = (95 * coeff) / ui_x8; + tmax = max_t(s32, temp, 0); + timing->clk_prepare = linear_inter(tmax, tmin, pcnt0, 0, false); + + temp = 300 * coeff - (timing->clk_prepare << 3) * ui; + tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = (tmin > 255) ? 511 : 255; + timing->clk_zero = linear_inter(tmax, tmin, pcnt5, 0, false); + + tmin = DIV_ROUND_UP(60 * coeff + 3 * ui, ui_x8); + temp = 105 * coeff + 12 * ui - 20 * coeff; + tmax = (temp + 3 * ui) / ui_x8; + timing->clk_trail = linear_inter(tmax, tmin, pcnt3, 0, false); + + temp = S_DIV_ROUND_UP(40 * coeff + 4 * ui, ui_x8); + tmin = max_t(s32, temp, 0); + temp = (85 * coeff + 6 * ui) / ui_x8; + tmax = max_t(s32, temp, 0); + timing->hs_prepare = linear_inter(tmax, tmin, pcnt1, 0, false); + + temp = 145 * coeff + 10 * ui - (timing->hs_prepare << 3) * ui; + tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = 255; + timing->hs_zero = linear_inter(tmax, tmin, pcnt4, 0, false); + + tmin = DIV_ROUND_UP(60 * coeff + 4 * ui, ui_x8) - 1; + temp = 105 * coeff + 12 * ui - 20 * coeff; + tmax = (temp / ui_x8) - 1; + timing->hs_trail = linear_inter(tmax, tmin, pcnt3, 0, false); + + temp = 50 * coeff + ((hb_en << 2) - 8) * ui; + timing->hs_rqst = S_DIV_ROUND_UP(temp, ui_x8); + + tmin = DIV_ROUND_UP(100 * coeff, ui_x8) - 1; + tmax = 255; + timing->hs_exit = linear_inter(tmax, tmin, pcnt2, 0, false); + + temp = 50 * coeff + ((hb_en_ckln << 2) - 8) * ui; + timing->hs_rqst_ckln = S_DIV_ROUND_UP(temp, ui_x8); + + temp = 60 * coeff + 52 * ui - 43 * ui; + tmin = DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = 63; + timing->shared_timings.clk_post = + linear_inter(tmax, tmin, pcnt2, 0, false); + + temp = 8 * ui + (timing->clk_prepare << 3) * ui; + temp += (((timing->clk_zero + 3) << 3) + 11) * ui; + temp += hb_en_ckln ? (((timing->hs_rqst_ckln << 3) + 4) * ui) : + (((timing->hs_rqst_ckln << 3) + 8) * ui); + tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = 63; + if (tmin > tmax) { + temp = linear_inter(tmax << 1, tmin, pcnt2, 0, false); + timing->shared_timings.clk_pre = temp >> 1; + timing->shared_timings.clk_pre_inc_by_2 = 1; + } else { + timing->shared_timings.clk_pre = + linear_inter(tmax, tmin, pcnt2, 0, false); + timing->shared_timings.clk_pre_inc_by_2 = 0; + } + + timing->ta_go = 3; + timing->ta_sure = 0; + timing->ta_get = 4; + + DBG("%d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d", + timing->shared_timings.clk_pre, timing->shared_timings.clk_post, + timing->shared_timings.clk_pre_inc_by_2, timing->clk_zero, + timing->clk_trail, timing->clk_prepare, timing->hs_exit, + timing->hs_zero, timing->hs_prepare, timing->hs_trail, + timing->hs_rqst, timing->hs_rqst_ckln, timing->hs_halfbyte_en, + timing->hs_halfbyte_en_ckln, timing->hs_prep_dly, + timing->hs_prep_dly_ckln); + + return 0; +} + void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg, u32 bit_mask) { diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c56268cbdb3d..a24ab80994a3 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -101,6 +101,8 @@ int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing, struct msm_dsi_phy_clk_request *clk_req); int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, struct msm_dsi_phy_clk_request *clk_req); +int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, + struct msm_dsi_phy_clk_request *clk_req); void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg, u32 bit_mask); int msm_dsi_phy_init_common(struct msm_dsi_phy *phy); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index 0af951aaeea1..b3fffc8dbb2a 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -79,34 +79,6 @@ static void dsi_phy_hw_v3_0_lane_settings(struct msm_dsi_phy *phy) dsi_phy_write(lane_base + REG_DSI_10nm_PHY_LN_TX_DCTRL(3), 0x04); } -static int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, - struct msm_dsi_phy_clk_request *clk_req) -{ - /* - * TODO: These params need to be computed, they're currently hardcoded - * for a 1440x2560@60Hz panel with a byteclk of 100.618 Mhz, and a - * default escape clock of 19.2 Mhz. - */ - - timing->hs_halfbyte_en = 0; - timing->clk_zero = 0x1c; - timing->clk_prepare = 0x07; - timing->clk_trail = 0x07; - timing->hs_exit = 0x23; - timing->hs_zero = 0x21; - timing->hs_prepare = 0x07; - timing->hs_trail = 0x07; - timing->hs_rqst = 0x05; - timing->ta_sure = 0x00; - timing->ta_go = 0x03; - timing->ta_get = 0x04; - - timing->shared_timings.clk_pre = 0x2d; - timing->shared_timings.clk_post = 0x0d; - - return 0; -} - static int dsi_10nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, struct msm_dsi_phy_clk_request *clk_req) { From 78b32d49c49f7162f9ab5884a1b7228f6bfa2632 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 28 Feb 2018 14:19:00 -0500 Subject: [PATCH 328/660] drm/msm: Mark the crtc->state->event consumed Don't leave the event != NULL once it's consumed, this is used a signal to the atomic helpers that the event will be handled by the driver. Changes in v2: - None Changes in v3: - Rebased on Archit's private_obj set Changes in v4: - None Cc: Jeykumar Sankaran Reviewed-by: Archit Taneja Signed-off-by: Sean Paul Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 1 + drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c index 6e5e1aa54ce1..b001699297c4 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c @@ -351,6 +351,7 @@ static void mdp4_crtc_atomic_flush(struct drm_crtc *crtc, spin_lock_irqsave(&dev->event_lock, flags); mdp4_crtc->event = crtc->state->event; + crtc->state->event = NULL; spin_unlock_irqrestore(&dev->event_lock, flags); blend_setup(crtc); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 9893e43ba6c5..76b96081916f 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -708,6 +708,7 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc, spin_lock_irqsave(&dev->event_lock, flags); mdp5_crtc->event = crtc->state->event; + crtc->state->event = NULL; spin_unlock_irqrestore(&dev->event_lock, flags); /* From f2f3df0aa8522b5641110bbabe54c79922601476 Mon Sep 17 00:00:00 2001 From: Jeykumar Sankaran Date: Tue, 13 Feb 2018 12:42:44 -0500 Subject: [PATCH 329/660] drm/msm: Add modifier to mdp_get_format arguments This change plumbs the new fb modifier through the various mdp/disp get_format hooks. Signed-off-by: Jeykumar Sankaran [seanpaul pimped out commit message a bit] Signed-off-by: Sean Paul Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/mdp_format.c | 3 ++- drivers/gpu/drm/msm/disp/mdp_kms.h | 2 +- drivers/gpu/drm/msm/msm_fb.c | 3 ++- drivers/gpu/drm/msm/msm_kms.h | 5 ++++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp_format.c b/drivers/gpu/drm/msm/disp/mdp_format.c index b4a8aa4490ee..005760bee708 100644 --- a/drivers/gpu/drm/msm/disp/mdp_format.c +++ b/drivers/gpu/drm/msm/disp/mdp_format.c @@ -171,7 +171,8 @@ uint32_t mdp_get_formats(uint32_t *pixel_formats, uint32_t max_formats, return i; } -const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format) +const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, + uint64_t modifier) { int i; for (i = 0; i < ARRAY_SIZE(formats); i++) { diff --git a/drivers/gpu/drm/msm/disp/mdp_kms.h b/drivers/gpu/drm/msm/disp/mdp_kms.h index 1185487e7e5e..4fa8dbe4e165 100644 --- a/drivers/gpu/drm/msm/disp/mdp_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp_kms.h @@ -98,7 +98,7 @@ struct mdp_format { #define MDP_FORMAT_IS_YUV(mdp_format) ((mdp_format)->is_yuv) uint32_t mdp_get_formats(uint32_t *formats, uint32_t max_formats, bool rgb_only); -const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format); +const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier); /* MDP capabilities */ #define MDP_CAP_SMP BIT(0) /* Shared Memory Pool */ diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index 0e0c87252ab0..7a16242bf8bf 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -183,7 +183,8 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, hsub = drm_format_horz_chroma_subsampling(mode_cmd->pixel_format); vsub = drm_format_vert_chroma_subsampling(mode_cmd->pixel_format); - format = kms->funcs->get_format(kms, mode_cmd->pixel_format); + format = kms->funcs->get_format(kms, mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!format) { dev_err(dev->dev, "unsupported pixel format: %4.4s\n", (char *)&mode_cmd->pixel_format); diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index 17d5824417ad..aaa329dc020e 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -48,8 +48,11 @@ struct msm_kms_funcs { /* functions to wait for atomic commit completed on each CRTC */ void (*wait_for_crtc_commit_done)(struct msm_kms *kms, struct drm_crtc *crtc); + /* get msm_format w/ optional format modifiers from drm_mode_fb_cmd2 */ + const struct msm_format *(*get_format)(struct msm_kms *kms, + const uint32_t format, + const uint64_t modifiers); /* misc: */ - const struct msm_format *(*get_format)(struct msm_kms *kms, uint32_t format); long (*round_pixclk)(struct msm_kms *kms, unsigned long rate, struct drm_encoder *encoder); int (*set_split_display)(struct msm_kms *kms, From 3976626ea3d2011f8fd3f3a47070a8b792018253 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Apr 2018 23:38:45 +0100 Subject: [PATCH 330/660] drm/msm: Fix possible null dereference on failure of get_pages() Commit 62e3a3e342af changed get_pages() to initialise msm_gem_object::pages before trying to initialise msm_gem_object::sgt, so that put_pages() would properly clean up pages in the failure case. However, this means that put_pages() now needs to check that msm_gem_object::sgt is not null before trying to clean it up, and this check was only applied to part of the cleanup code. Move it all into the conditional block. (Strictly speaking we don't need to make the kfree() conditional, but since we can't avoid checking for null ourselves we may as well do so.) Fixes: 62e3a3e342af ("drm/msm: fix leak in failed get_pages") Signed-off-by: Ben Hutchings Reviewed-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 95196479f651..f583bb4222f9 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -132,17 +132,19 @@ static void put_pages(struct drm_gem_object *obj) struct msm_gem_object *msm_obj = to_msm_bo(obj); if (msm_obj->pages) { - /* For non-cached buffers, ensure the new pages are clean - * because display controller, GPU, etc. are not coherent: - */ - if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + if (msm_obj->sgt) { + /* For non-cached buffers, ensure the new + * pages are clean because display controller, + * GPU, etc. are not coherent: + */ + if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) + dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, + DMA_BIDIRECTIONAL); - if (msm_obj->sgt) sg_free_table(msm_obj->sgt); - - kfree(msm_obj->sgt); + kfree(msm_obj->sgt); + } if (use_pages(obj)) drm_gem_put_pages(obj, msm_obj->pages, true, false); From a4af89286f8fc382459308764ea05935dc477cdc Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 19 Mar 2018 22:26:32 +0100 Subject: [PATCH 331/660] drm/msm/dsi: use correct enum in dsi_get_cmd_fmt The function dsi_get_cmd_fmt returns enum dsi_cmd_dst_format, use the correct enum value also for MIPI_DSI_FMT_RGB666/_PACKED. This has been discovered using clang: drivers/gpu/drm/msm/dsi/dsi_host.c:743:35: warning: implicit conversion from enumeration type 'enum dsi_vid_dst_format' to different enumeration type 'enum dsi_cmd_dst_format' [-Wenum-conversion] case MIPI_DSI_FMT_RGB666: return VID_DST_FORMAT_RGB666; ~~~~~~ ^~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Stefan Agner Reviewed-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 2f770386ab4a..8baba30d6c65 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -776,7 +776,7 @@ static inline enum dsi_cmd_dst_format dsi_get_cmd_fmt( switch (mipi_fmt) { case MIPI_DSI_FMT_RGB888: return CMD_DST_FORMAT_RGB888; case MIPI_DSI_FMT_RGB666_PACKED: - case MIPI_DSI_FMT_RGB666: return VID_DST_FORMAT_RGB666; + case MIPI_DSI_FMT_RGB666: return CMD_DST_FORMAT_RGB666; case MIPI_DSI_FMT_RGB565: return CMD_DST_FORMAT_RGB565; default: return CMD_DST_FORMAT_RGB888; } From 789d4c300e10eb2096ee83c3497118e67ccc951e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 28 Mar 2018 17:22:16 +0100 Subject: [PATCH 332/660] drm/msm: don't deref error pointer in the msm_fbdev_create error path Currently the error pointer returned by msm_alloc_stolen_fb gets passed to drm_framebuffer_remove. The latter handles only NULL pointers, thus a nasty crash will occur. Drop the unnecessary fail label and the associated checks - both err and fb will be set at this stage. Cc: Rob Clark Cc: linux-arm-msm@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: freedreno@lists.freedesktop.org Signed-off-by: Emil Velikov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_fbdev.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index c178563fcd4d..456622b46335 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -92,8 +92,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, if (IS_ERR(fb)) { dev_err(dev->dev, "failed to allocate fb\n"); - ret = PTR_ERR(fb); - goto fail; + return PTR_ERR(fb); } bo = msm_framebuffer_bo(fb, 0); @@ -151,13 +150,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, fail_unlock: mutex_unlock(&dev->struct_mutex); -fail: - - if (ret) { - if (fb) - drm_framebuffer_remove(fb); - } - + drm_framebuffer_remove(fb); return ret; } From 5a786232eb69a1f870ddc0cfd69d5bdef241a2ea Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 19 Apr 2018 16:17:14 +0200 Subject: [PATCH 333/660] netfilter: xt_connmark: do not cast xt_connmark_tginfo1 to xt_connmark_tginfo2 These structures have different layout, fill xt_connmark_tginfo2 with old fields in xt_connmark_tginfo1. Based on patch from Jack Ma. Fixes: 472a73e00757 ("netfilter: xt_conntrack: Support bit-shifting for CONNMARK & MARK targets.") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connmark.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 4b424e6caf3e..94df000abb92 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -36,9 +36,7 @@ MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); static unsigned int -connmark_tg_shift(struct sk_buff *skb, - const struct xt_connmark_tginfo1 *info, - u8 shift_bits, u8 shift_dir) +connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) { enum ip_conntrack_info ctinfo; u_int32_t new_targetmark; @@ -52,10 +50,11 @@ connmark_tg_shift(struct sk_buff *skb, switch (info->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; - if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + if (info->shift_dir == D_SHIFT_RIGHT) + newmark >>= info->shift_bits; else - newmark <<= shift_bits; + newmark <<= info->shift_bits; + if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); @@ -63,10 +62,11 @@ connmark_tg_shift(struct sk_buff *skb, break; case XT_CONNMARK_SAVE: new_targetmark = (skb->mark & info->nfmask); - if (shift_dir == D_SHIFT_RIGHT) - new_targetmark >>= shift_bits; + if (info->shift_dir == D_SHIFT_RIGHT) + new_targetmark >>= info->shift_bits; else - new_targetmark <<= shift_bits; + new_targetmark <<= info->shift_bits; + newmark = (ct->mark & ~info->ctmask) ^ new_targetmark; if (ct->mark != newmark) { @@ -76,10 +76,11 @@ connmark_tg_shift(struct sk_buff *skb, break; case XT_CONNMARK_RESTORE: new_targetmark = (ct->mark & info->ctmask); - if (shift_dir == D_SHIFT_RIGHT) - new_targetmark >>= shift_bits; + if (info->shift_dir == D_SHIFT_RIGHT) + new_targetmark >>= info->shift_bits; else - new_targetmark <<= shift_bits; + new_targetmark <<= info->shift_bits; + newmark = (skb->mark & ~info->nfmask) ^ new_targetmark; skb->mark = newmark; @@ -92,8 +93,14 @@ static unsigned int connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo1 *info = par->targinfo; + const struct xt_connmark_tginfo2 info2 = { + .ctmark = info->ctmark, + .ctmask = info->ctmask, + .nfmask = info->nfmask, + .mode = info->mode, + }; - return connmark_tg_shift(skb, info, 0, 0); + return connmark_tg_shift(skb, &info2); } static unsigned int @@ -101,8 +108,7 @@ connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo2 *info = par->targinfo; - return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info, - info->shift_bits, info->shift_dir); + return connmark_tg_shift(skb, info); } static int connmark_tg_check(const struct xt_tgchk_param *par) From bea548831b8cee347181132eacd8b9711dfced92 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Thu, 19 Apr 2018 12:04:26 +0800 Subject: [PATCH 334/660] blkcg: small fix on comment in blkcg_init_queue The comment before blkg_create() in blkcg_init_queue() was moved from blkcg_activate_policy() by commit ec13b1d6f0a0457312e615, but it does not suit for the new context. Signed-off-by: Jiang Biao Signed-off-by: Wen Yang CC: Tejun Heo CC: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 21bc449d01c0..79da2a723b68 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1177,11 +1177,7 @@ int blkcg_init_queue(struct request_queue *q) preloaded = !radix_tree_preload(GFP_KERNEL); - /* - * Make sure the root blkg exists and count the existing blkgs. As - * @q is bypassing at this point, blkg_lookup_create() can't be - * used. Open code insertion. - */ + /* Make sure the root blkg exists. */ rcu_read_lock(); spin_lock_irq(q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); From 901932a3f9b2b80352896be946c6d577c0a9652c Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Thu, 19 Apr 2018 12:06:09 +0800 Subject: [PATCH 335/660] blkcg: init root blkcg_gq under lock The initializing of q->root_blkg is currently outside of queue lock and rcu, so the blkg may be destroied before the initializing, which may cause dangling/null references. On the other side, the destroys of blkg are protected by queue lock or rcu. Put the initializing inside the queue lock and rcu to make it safer. Signed-off-by: Jiang Biao Signed-off-by: Wen Yang CC: Tejun Heo CC: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 79da2a723b68..eb85cb87c40f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1181,18 +1181,16 @@ int blkcg_init_queue(struct request_queue *q) rcu_read_lock(); spin_lock_irq(q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); + if (IS_ERR(blkg)) + goto err_unlock; + q->root_blkg = blkg; + q->root_rl.blkg = blkg; spin_unlock_irq(q->queue_lock); rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); - if (IS_ERR(blkg)) - return PTR_ERR(blkg); - - q->root_blkg = blkg; - q->root_rl.blkg = blkg; - ret = blk_throtl_init(q); if (ret) { spin_lock_irq(q->queue_lock); @@ -1200,6 +1198,13 @@ int blkcg_init_queue(struct request_queue *q) spin_unlock_irq(q->queue_lock); } return ret; + +err_unlock: + spin_unlock_irq(q->queue_lock); + rcu_read_unlock(); + if (preloaded) + radix_tree_preload_end(); + return PTR_ERR(blkg); } /** From 1cdae042fc63dd987f39ffb3258e54fdac8b9852 Mon Sep 17 00:00:00 2001 From: Ahbong Chang Date: Mon, 16 Apr 2018 10:36:25 +0800 Subject: [PATCH 336/660] tracing: Add missing forward declaration Without this forward declaration compile may fail if this header is included only for registering other probe event without struct pool_workqueue. Link: http://lkml.kernel.org/r/20180416023626.139915-1-cwahbong@google.com Reviewed-by: Todd Poynor Signed-off-by: Ahbong Chang Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/workqueue.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 2f057a494d93..9a761bc6a251 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -25,6 +25,8 @@ DECLARE_EVENT_CLASS(workqueue_work, TP_printk("work struct %p", __entry->work) ); +struct pool_workqueue; + /** * workqueue_queue_work - called when a work gets queued * @req_cpu: the requested cpu From d78fd7255881645fc645e23145d469385227170d Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Thu, 12 Apr 2018 16:37:09 -0400 Subject: [PATCH 337/660] drm/amd/display: Don't program bypass on linear regamma LUT Even though this is required for degamma since DCE HW only supports a couple predefined LUTs we can just program the LUT directly for regamma. This fixes dark screens which occurs when we program regamma to bypass while degamma is using srgb LUT. Signed-off-by: Harry Wentland Reviewed-by: Leo Li Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index f6cb502c303f..25f064c01038 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -138,13 +138,6 @@ int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc) lut = (struct drm_color_lut *)blob->data; lut_size = blob->length / sizeof(struct drm_color_lut); - if (__is_lut_linear(lut, lut_size)) { - /* Set to bypass if lut is set to linear */ - stream->out_transfer_func->type = TF_TYPE_BYPASS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - return 0; - } - gamma = dc_create_gamma(); if (!gamma) return -ENOMEM; From 84f8508f717268c333de8e472f351d6a7a487e51 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 17 Apr 2018 17:26:26 +0800 Subject: [PATCH 338/660] drm/amd/pp: Fix bug voltage can't be OD separately on VI Make sure to update the MCLK and SCLK flags when setting the VDDC flags due to dependencies. Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index add90675fd2a..26fbeafc3c96 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4743,23 +4743,27 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) for (i=0; i < dep_table->count; i++) { if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; - break; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; + return; } } - if (i == dep_table->count) + if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + } dep_table = table_info->vdd_dep_on_sclk; odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk); for (i=0; i < dep_table->count; i++) { if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; - break; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; + return; } } - if (i == dep_table->count) + if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + } } static int smu7_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, From cc9e992dfb5bb48f59f3fbc1268d3f38d2c86ef3 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 17 Apr 2018 21:49:51 +0800 Subject: [PATCH 339/660] drm/amd/powerplay: header file interface to SMU update update vega12 smu interface. Signed-off-by: Kenneth Feng Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h index fb696e3d06cf..2f8a3b983cce 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h @@ -412,8 +412,10 @@ typedef struct { QuadraticInt_t ReservedEquation2; QuadraticInt_t ReservedEquation3; + uint16_t MinVoltageUlvGfx; + uint16_t MinVoltageUlvSoc; - uint32_t Reserved[15]; + uint32_t Reserved[14]; From 8a9fd8323087e794f1d3cd4850b393ced048bc73 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 18 Apr 2018 16:05:18 -0600 Subject: [PATCH 340/660] coresight: Move to SPDX identifier Move CoreSight headers to the SPDX identifier. Signed-off-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1524089118-27595-1-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/coresight-pmu.h | 13 +------------ tools/include/linux/coresight-pmu.h | 13 +------------ tools/perf/arch/arm/util/auxtrace.c | 13 +------------ tools/perf/arch/arm/util/cs-etm.c | 13 +------------ tools/perf/arch/arm/util/cs-etm.h | 13 +------------ tools/perf/arch/arm/util/pmu.c | 13 +------------ tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 3 +-- tools/perf/util/cs-etm.c | 3 +-- tools/perf/util/cs-etm.h | 13 +------------ 9 files changed, 9 insertions(+), 88 deletions(-) diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index edfeaba95429..a1a959ba24ff 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . */ #ifndef _LINUX_CORESIGHT_PMU_H diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index edfeaba95429..a1a959ba24ff 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . */ #ifndef _LINUX_CORESIGHT_PMU_H diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index fa639e3e52ac..1ce6bdbda561 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . */ #include diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 5c655ad4621e..2f595cd73da6 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . */ #include diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h index 5256741be549..1a12e64f5127 100644 --- a/tools/perf/arch/arm/util/cs-etm.h +++ b/tools/perf/arch/arm/util/cs-etm.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . */ #ifndef INCLUDE__PERF_CS_ETM_H__ diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index ac4dffc807b8..e047571e6080 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . */ #include diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 640af88331b4..c8b98fa22997 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright(C) 2015-2018 Linaro Limited. * * Author: Tor Jeremiassen diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1b0d422373be..40020b1ca54f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright(C) 2015-2018 Linaro Limited. * * Author: Tor Jeremiassen diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 5864d5dca616..37f8d48179ca 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . */ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ From 8a56ef4f3ffba9ebf4967b61ef600b0a7ba10f11 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Apr 2018 18:16:15 +0200 Subject: [PATCH 341/660] ALSA: rawmidi: Fix missing input substream checks in compat ioctls Some rawmidi compat ioctls lack of the input substream checks (although they do check only for rfile->output). This many eventually lead to an Oops as NULL substream is passed to the rawmidi core functions. Fix it by adding the proper checks before each function call. The bug was spotted by syzkaller. Reported-by: syzbot+f7a0348affc3b67bc617@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/rawmidi_compat.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c index f69764d7cdd7..e30e30ba6e39 100644 --- a/sound/core/rawmidi_compat.c +++ b/sound/core/rawmidi_compat.c @@ -36,8 +36,6 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, struct snd_rawmidi_params params; unsigned int val; - if (rfile->output == NULL) - return -EINVAL; if (get_user(params.stream, &src->stream) || get_user(params.buffer_size, &src->buffer_size) || get_user(params.avail_min, &src->avail_min) || @@ -46,8 +44,12 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, params.no_active_sensing = val; switch (params.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; return snd_rawmidi_output_params(rfile->output, ¶ms); case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; return snd_rawmidi_input_params(rfile->input, ¶ms); } return -EINVAL; @@ -67,16 +69,18 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: @@ -112,16 +116,18 @@ static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: From bb9aaaa1849eed763c6b7f20227a8a03300d4421 Mon Sep 17 00:00:00 2001 From: sunlianwen Date: Wed, 18 Apr 2018 09:22:39 +0800 Subject: [PATCH 342/660] net: change the comment of dev_mc_init The comment of dev_mc_init() is wrong. which use dev_mc_flush instead of dev_mc_init. Signed-off-by: Lianwen Sun --- net/core/dev_addr_lists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index e3e6a3e2ca22..d884d8f5f0e5 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -839,7 +839,7 @@ void dev_mc_flush(struct net_device *dev) EXPORT_SYMBOL(dev_mc_flush); /** - * dev_mc_flush - Init multicast address list + * dev_mc_init - Init multicast address list * @dev: device * * Init multicast address list. From da42bb271305d68df6cbf99eed90542f1f1ee1c9 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 18 Apr 2018 11:14:44 +0200 Subject: [PATCH 343/660] net: mvpp2: Fix DMA address mask size PPv2 TX/RX descriptors uses 40bits DMA addresses, but 41 bits masks were used (GENMASK_ULL(40, 0)). This commit fixes that by using the correct mask. Fixes: e7c5359f2eed ("net: mvpp2: introduce PPv2.2 HW descriptors and adapt accessors") Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 9deb79b6dcc8..4202f9b5b966 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -916,6 +916,8 @@ static struct { #define MVPP2_MIB_COUNTERS_STATS_DELAY (1 * HZ) +#define MVPP2_DESC_DMA_MASK DMA_BIT_MASK(40) + /* Definitions */ /* Shared Packet Processor resources */ @@ -1429,7 +1431,7 @@ static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port, if (port->priv->hw_version == MVPP21) return tx_desc->pp21.buf_dma_addr; else - return tx_desc->pp22.buf_dma_addr_ptp & GENMASK_ULL(40, 0); + return tx_desc->pp22.buf_dma_addr_ptp & MVPP2_DESC_DMA_MASK; } static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port, @@ -1447,7 +1449,7 @@ static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port, } else { u64 val = (u64)addr; - tx_desc->pp22.buf_dma_addr_ptp &= ~GENMASK_ULL(40, 0); + tx_desc->pp22.buf_dma_addr_ptp &= ~MVPP2_DESC_DMA_MASK; tx_desc->pp22.buf_dma_addr_ptp |= val; tx_desc->pp22.packet_offset = offset; } @@ -1507,7 +1509,7 @@ static dma_addr_t mvpp2_rxdesc_dma_addr_get(struct mvpp2_port *port, if (port->priv->hw_version == MVPP21) return rx_desc->pp21.buf_dma_addr; else - return rx_desc->pp22.buf_dma_addr_key_hash & GENMASK_ULL(40, 0); + return rx_desc->pp22.buf_dma_addr_key_hash & MVPP2_DESC_DMA_MASK; } static unsigned long mvpp2_rxdesc_cookie_get(struct mvpp2_port *port, @@ -1516,7 +1518,7 @@ static unsigned long mvpp2_rxdesc_cookie_get(struct mvpp2_port *port, if (port->priv->hw_version == MVPP21) return rx_desc->pp21.buf_cookie; else - return rx_desc->pp22.buf_cookie_misc & GENMASK_ULL(40, 0); + return rx_desc->pp22.buf_cookie_misc & MVPP2_DESC_DMA_MASK; } static size_t mvpp2_rxdesc_size_get(struct mvpp2_port *port, @@ -8789,7 +8791,7 @@ static int mvpp2_probe(struct platform_device *pdev) } if (priv->hw_version == MVPP22) { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(40)); + err = dma_set_mask(&pdev->dev, MVPP2_DESC_DMA_MASK); if (err) goto err_mg_clk; /* Sadly, the BM pools all share the same register to From 565020aaeebfa7c8b3ec077bee38f4c15acc9905 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 18 Apr 2018 10:57:55 +0100 Subject: [PATCH 344/660] net: stmmac: Disable ACS Feature for GMAC >= 4 ACS Feature is currently enabled for GMAC >= 4 but the llc_snap status is never checked in descriptor rx_status callback. This will cause stmmac to always strip packets even that ACS feature is already stripping them. Lets be safe and disable the ACS feature for GMAC >= 4 and always strip the packets for this GMAC version. Fixes: 477286b53f55 ("stmmac: add GMAC4 core support") Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 7 ------- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index c7bff596c665..dedd40613090 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -347,7 +347,7 @@ enum power_event { #define MTL_RX_OVERFLOW_INT BIT(16) /* Default operating mode of the MAC */ -#define GMAC_CORE_INIT (GMAC_CONFIG_JD | GMAC_CONFIG_PS | GMAC_CONFIG_ACS | \ +#define GMAC_CORE_INIT (GMAC_CONFIG_JD | GMAC_CONFIG_PS | \ GMAC_CONFIG_BE | GMAC_CONFIG_DCRS) /* To dump the core regs excluding the Address Registers */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index a3af92ebbca8..517b1f6736a8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -31,13 +31,6 @@ static void dwmac4_core_init(struct mac_device_info *hw, value |= GMAC_CORE_INIT; - /* Clear ACS bit because Ethernet switch tagging formats such as - * Broadcom tags can look like invalid LLC/SNAP packets and cause the - * hardware to truncate packets on reception. - */ - if (netdev_uses_dsa(dev)) - value &= ~GMAC_CONFIG_ACS; - if (mtu > 1500) value |= GMAC_CONFIG_2K; if (mtu > 2000) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9a16931ce39d..b65e2d144698 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3495,8 +3495,13 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 * Type frames (LLC/LLC-SNAP) + * + * llc_snap is never checked in GMAC >= 4, so this ACS + * feature is always disabled and packets need to be + * stripped manually. */ - if (unlikely(status != llc_snap)) + if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00) || + unlikely(status != llc_snap)) frame_len -= ETH_FCS_LEN; if (netif_msg_rx_status(priv)) { From 5e84b38b07e676fcd3ab6e296780b4f77a29d09f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 18 Apr 2018 12:00:08 +0100 Subject: [PATCH 345/660] net: caif: fix spelling mistake "UKNOWN" -> "UNKNOWN" Trivial fix to spelling mistake Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 53ecda10b790..13e2ae6be620 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -174,7 +174,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" : flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" : flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? - "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND"); + "REMOTE_SHUTDOWN" : "UNKNOWN CTRL COMMAND"); From 4ec7eb3ff6eb5c9af3a84288a8d808a857fbc22b Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Wed, 18 Apr 2018 16:03:24 +0200 Subject: [PATCH 346/660] net: qmi_wwan: add Wistron Neweb D19Q1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This modem is embedded on dlink dwr-960 router. The oem configuration states: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1435 ProdID=d191 Rev=ff.ff S: Manufacturer=Android S: Product=Android S: SerialNumber=0123456789ABCDEF C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Tested on openwrt distribution Signed-off-by: Pawel Dembicki Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index ca066b785e9f..c853e7410f5a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1107,6 +1107,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ + {QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ From f3335545b34315fc42cc03a83165bdd26d956584 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 18 Apr 2018 16:55:05 +0100 Subject: [PATCH 347/660] atm: iphase: fix spelling mistake: "Tansmit" -> "Transmit" Trivial fix to spelling mistake in message text. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/iphase.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 44abb8a0a5e5..be076606d30e 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -671,7 +671,7 @@ static void ia_tx_poll (IADEV *iadev) { if ((vcc->pop) && (skb1->len != 0)) { vcc->pop(vcc, skb1); - IF_EVENT(printk("Tansmit Done - skb 0x%lx return\n", + IF_EVENT(printk("Transmit Done - skb 0x%lx return\n", (long)skb1);) } else @@ -1665,7 +1665,7 @@ static void tx_intr(struct atm_dev *dev) status = readl(iadev->seg_reg+SEG_INTR_STATUS_REG); if (status & TRANSMIT_DONE){ - IF_EVENT(printk("Tansmit Done Intr logic run\n");) + IF_EVENT(printk("Transmit Done Intr logic run\n");) spin_lock_irqsave(&iadev->tx_lock, flags); ia_tx_poll(iadev); spin_unlock_irqrestore(&iadev->tx_lock, flags); From 02b94fc70ffe320a7799c35e09372809e40b7131 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 18 Apr 2018 10:14:13 -0600 Subject: [PATCH 348/660] MAINTAINERS: Direct networking documentation changes to netdev Networking docs changes go through the networking tree, so patch the MAINTAINERS file to direct authors to the right place. Signed-off-by: Jonathan Corbet Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b60179d948bb..1624f7d92364 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9751,6 +9751,7 @@ F: include/uapi/linux/net_namespace.h F: tools/testing/selftests/net/ F: lib/net_utils.c F: lib/random32.c +F: Documentation/networking/ NETWORKING [IPSEC] M: Steffen Klassert From f7e43672683b097bb074a8fe7af9bc600a23f231 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 18 Apr 2018 11:51:56 -0700 Subject: [PATCH 349/660] llc: hold llc_sap before release_sock() syzbot reported we still access llc->sap in llc_backlog_rcv() after it is freed in llc_sap_remove_socket(): Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 llc_conn_ac_send_sabme_cmd_p_set_x+0x3a8/0x460 net/llc/llc_c_ac.c:785 llc_exec_conn_trans_actions net/llc/llc_conn.c:475 [inline] llc_conn_service net/llc/llc_conn.c:400 [inline] llc_conn_state_process+0x4e1/0x13a0 net/llc/llc_conn.c:75 llc_backlog_rcv+0x195/0x1e0 net/llc/llc_conn.c:891 sk_backlog_rcv include/net/sock.h:909 [inline] __release_sock+0x12f/0x3a0 net/core/sock.c:2335 release_sock+0xa4/0x2b0 net/core/sock.c:2850 llc_ui_release+0xc8/0x220 net/llc/af_llc.c:204 llc->sap is refcount'ed and llc_sap_remove_socket() is paired with llc_sap_add_socket(). This can be amended by holding its refcount before llc_sap_remove_socket() and releasing it after release_sock(). Reported-by: Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/llc/af_llc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 01dcc0823d1f..6d29b2b94e84 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -189,6 +189,7 @@ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; + struct llc_sap *sap; if (unlikely(sk == NULL)) goto out; @@ -199,9 +200,15 @@ static int llc_ui_release(struct socket *sock) llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); + sap = llc->sap; + /* Hold this for release_sock(), so that llc_backlog_rcv() could still + * use it. + */ + llc_sap_hold(sap); if (!sock_flag(sk, SOCK_ZAPPED)) llc_sap_remove_socket(llc->sap, sk); release_sock(sk); + llc_sap_put(sap); if (llc->dev) dev_put(llc->dev); sock_put(sk); From 65ec0bd1c7c14522670a5294de35710fb577a7fd Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Wed, 18 Apr 2018 12:48:04 -0700 Subject: [PATCH 350/660] vmxnet3: fix incorrect dereference when rxvlan is disabled vmxnet3_get_hdr_len() is used to calculate the header length which in turn is used to calculate the gso_size for skb. When rxvlan offload is disabled, vlan tag is present in the header and the function references ip header from sizeof(ethhdr) and leads to incorrect pointer reference. This patch fixes this issue by taking sizeof(vlan_ethhdr) into account if vlan tag is present and correctly references the ip hdr. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Acked-by: Louis Luo Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 17 +++++++++++++---- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index e04937f44f33..9ebe2a689966 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1218,6 +1218,7 @@ vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb, union { void *ptr; struct ethhdr *eth; + struct vlan_ethhdr *veth; struct iphdr *ipv4; struct ipv6hdr *ipv6; struct tcphdr *tcp; @@ -1228,16 +1229,24 @@ vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb, if (unlikely(sizeof(struct iphdr) + sizeof(struct tcphdr) > maplen)) return 0; + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || + skb->protocol == cpu_to_be16(ETH_P_8021AD)) + hlen = sizeof(struct vlan_ethhdr); + else + hlen = sizeof(struct ethhdr); + hdr.eth = eth_hdr(skb); if (gdesc->rcd.v4) { - BUG_ON(hdr.eth->h_proto != htons(ETH_P_IP)); - hdr.ptr += sizeof(struct ethhdr); + BUG_ON(hdr.eth->h_proto != htons(ETH_P_IP) && + hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IP)); + hdr.ptr += hlen; BUG_ON(hdr.ipv4->protocol != IPPROTO_TCP); hlen = hdr.ipv4->ihl << 2; hdr.ptr += hdr.ipv4->ihl << 2; } else if (gdesc->rcd.v6) { - BUG_ON(hdr.eth->h_proto != htons(ETH_P_IPV6)); - hdr.ptr += sizeof(struct ethhdr); + BUG_ON(hdr.eth->h_proto != htons(ETH_P_IPV6) && + hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IPV6)); + hdr.ptr += hlen; /* Use an estimated value, since we also need to handle * TSO case. */ diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 59ec34052a65..a3326463b71f 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.13.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.14.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040d00 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040e00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ From ab913455dd59b81204b6a0d387a44697b0e0bd85 Mon Sep 17 00:00:00 2001 From: Olivier Gayot Date: Wed, 18 Apr 2018 22:03:06 +0200 Subject: [PATCH 351/660] docs: ip-sysctl.txt: fix name of some ipv6 variables The name of the following proc/sysctl entries were incorrectly documented: /proc/sys/net/ipv6/conf//max_dst_opts_number /proc/sys/net/ipv6/conf//max_hbt_opts_number /proc/sys/net/ipv6/conf//max_dst_opts_length /proc/sys/net/ipv6/conf//max_hbt_length Their name was set to the name of the symbol in the .data field of the control table instead of their .proc name. Signed-off-by: Olivier Gayot Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5dc1a040a2f1..b583a73cf95f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1390,26 +1390,26 @@ mld_qrv - INTEGER Default: 2 (as specified by RFC3810 9.1) Minimum: 1 (as specified by RFC6636 4.5) -max_dst_opts_cnt - INTEGER +max_dst_opts_number - INTEGER Maximum number of non-padding TLVs allowed in a Destination options extension header. If this value is less than zero then unknown options are disallowed and the number of known TLVs allowed is the absolute value of this number. Default: 8 -max_hbh_opts_cnt - INTEGER +max_hbh_opts_number - INTEGER Maximum number of non-padding TLVs allowed in a Hop-by-Hop options extension header. If this value is less than zero then unknown options are disallowed and the number of known TLVs allowed is the absolute value of this number. Default: 8 -max dst_opts_len - INTEGER +max_dst_opts_length - INTEGER Maximum length allowed for a Destination options extension header. Default: INT_MAX (unlimited) -max hbh_opts_len - INTEGER +max_hbh_length - INTEGER Maximum length allowed for a Hop-by-Hop options extension header. Default: INT_MAX (unlimited) From d90a10e2444ba5a351fa695917258ff4c5709fa5 Mon Sep 17 00:00:00 2001 From: Robert Kolchmeyer Date: Thu, 19 Apr 2018 10:44:33 -0700 Subject: [PATCH 352/660] fsnotify: Fix fsnotify_mark_connector race fsnotify() acquires a reference to a fsnotify_mark_connector through the SRCU-protected pointer to_tell->i_fsnotify_marks. However, it appears that no precautions are taken in fsnotify_put_mark() to ensure that fsnotify() drops its reference to this fsnotify_mark_connector before assigning a value to its 'destroy_next' field. This can result in fsnotify_put_mark() assigning a value to a connector's 'destroy_next' field right before fsnotify() tries to traverse the linked list referenced by the connector's 'list' field. Since these two fields are members of the same union, this behavior results in a kernel panic. This issue is resolved by moving the connector's 'destroy_next' field into the object pointer union. This should work since the object pointer access is protected by both a spinlock and the value of the 'flags' field, and the 'flags' field is cleared while holding the spinlock in fsnotify_put_mark() before 'destroy_next' is updated. It shouldn't be possible for another thread to accidentally read from the object pointer after the 'destroy_next' field is updated. The offending behavior here is extremely unlikely; since fsnotify_put_mark() removes references to a connector (specifically, it ensures that the connector is unreachable from the inode it was formerly attached to) before updating its 'destroy_next' field, a sizeable chunk of code in fsnotify_put_mark() has to execute in the short window between when fsnotify() acquires the connector reference and saves the value of its 'list' field. On the HEAD kernel, I've only been able to reproduce this by inserting a udelay(1) in fsnotify(). However, I've been able to reproduce this issue without inserting a udelay(1) anywhere on older unmodified release kernels, so I believe it's worth fixing at HEAD. References: https://bugzilla.kernel.org/show_bug.cgi?id=199437 Fixes: 08991e83b7286635167bab40927665a90fb00d81 CC: stable@vger.kernel.org Signed-off-by: Robert Kolchmeyer Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9f1edb92c97e..a3d13d874fd1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -217,12 +217,10 @@ struct fsnotify_mark_connector { union { /* Object pointer [lock] */ struct inode *inode; struct vfsmount *mnt; - }; - union { - struct hlist_head list; /* Used listing heads to free after srcu period expires */ struct fsnotify_mark_connector *destroy_next; }; + struct hlist_head list; }; /* From f4ea89110df237da6fbcaab76af431e85f07d904 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Wed, 18 Apr 2018 21:55:41 -0600 Subject: [PATCH 353/660] net: hns: Avoid action name truncation When longer interface names are used, the action names exposed in /proc/interrupts and /proc/irq/* maybe truncated. For example, when using the predictable name algorithm in systemd on a HiSilicon D05, I see: ubuntu@d05-3:~$ grep enahisic2i0-tx /proc/interrupts | sed 's/.* //' enahisic2i0-tx0 enahisic2i0-tx1 [...] enahisic2i0-tx8 enahisic2i0-tx9 enahisic2i0-tx1 enahisic2i0-tx1 enahisic2i0-tx1 enahisic2i0-tx1 enahisic2i0-tx1 enahisic2i0-tx1 Increase the max ring name length to allow for an interface name of IFNAMSIZE. After this change, I now see: $ grep enahisic2i0-tx /proc/interrupts | sed 's/.* //' enahisic2i0-tx0 enahisic2i0-tx1 enahisic2i0-tx2 [...] enahisic2i0-tx8 enahisic2i0-tx9 enahisic2i0-tx10 enahisic2i0-tx11 enahisic2i0-tx12 enahisic2i0-tx13 enahisic2i0-tx14 enahisic2i0-tx15 Signed-off-by: dann frazier Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 3e62692af011..fa5b30f547f6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -87,7 +87,7 @@ do { \ #define HNAE_AE_REGISTER 0x1 -#define RCB_RING_NAME_LEN 16 +#define RCB_RING_NAME_LEN (IFNAMSIZ + 4) #define HNAE_LOWEST_LATENCY_COAL_PARAM 30 #define HNAE_LOW_LATENCY_COAL_PARAM 80 From 12e571693837d6164bda61e316b1944972ee0d97 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Apr 2018 08:30:48 +0300 Subject: [PATCH 354/660] virtio_net: split out ctrl buffer When sending control commands, virtio net sets up several buffers for DMA. The buffers are all part of the net device which means it's actually allocated by kvmalloc so it's in theory (on extreme memory pressure) possible to get a vmalloc'ed buffer which on some platforms means we can't DMA there. Fix up by moving the DMA buffers into a separate structure. Reported-by: Mikulas Patocka Suggested-by: Eric Dumazet Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 68 +++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index dfe78308044d..9a675250107f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -147,6 +147,17 @@ struct receive_queue { struct xdp_rxq_info xdp_rxq; }; +/* Control VQ buffers: protected by the rtnl lock */ +struct control_buf { + struct virtio_net_ctrl_hdr hdr; + virtio_net_ctrl_ack status; + struct virtio_net_ctrl_mq mq; + u8 promisc; + u8 allmulti; + u16 vid; + u64 offloads; +}; + struct virtnet_info { struct virtio_device *vdev; struct virtqueue *cvq; @@ -192,14 +203,7 @@ struct virtnet_info { struct hlist_node node; struct hlist_node node_dead; - /* Control VQ buffers: protected by the rtnl lock */ - struct virtio_net_ctrl_hdr ctrl_hdr; - virtio_net_ctrl_ack ctrl_status; - struct virtio_net_ctrl_mq ctrl_mq; - u8 ctrl_promisc; - u8 ctrl_allmulti; - u16 ctrl_vid; - u64 ctrl_offloads; + struct control_buf *ctrl; /* Ethtool settings */ u8 duplex; @@ -1461,25 +1465,25 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, /* Caller should know better */ BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); - vi->ctrl_status = ~0; - vi->ctrl_hdr.class = class; - vi->ctrl_hdr.cmd = cmd; + vi->ctrl->status = ~0; + vi->ctrl->hdr.class = class; + vi->ctrl->hdr.cmd = cmd; /* Add header */ - sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr)); + sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); sgs[out_num++] = &hdr; if (out) sgs[out_num++] = out; /* Add return status. */ - sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status)); + sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); sgs[out_num] = &stat; BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); if (unlikely(!virtqueue_kick(vi->cvq))) - return vi->ctrl_status == VIRTIO_NET_OK; + return vi->ctrl->status == VIRTIO_NET_OK; /* Spin for a response, the kick causes an ioport write, trapping * into the hypervisor, so the request should be handled immediately. @@ -1488,7 +1492,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, !virtqueue_is_broken(vi->cvq)) cpu_relax(); - return vi->ctrl_status == VIRTIO_NET_OK; + return vi->ctrl->status == VIRTIO_NET_OK; } static int virtnet_set_mac_address(struct net_device *dev, void *p) @@ -1600,8 +1604,8 @@ static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) return 0; - vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); - sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq)); + vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); + sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { @@ -1660,22 +1664,22 @@ static void virtnet_set_rx_mode(struct net_device *dev) if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) return; - vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0); - vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0); + vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); + vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); - sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc)); + sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_PROMISC, sg)) dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", - vi->ctrl_promisc ? "en" : "dis"); + vi->ctrl->promisc ? "en" : "dis"); - sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti)); + sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", - vi->ctrl_allmulti ? "en" : "dis"); + vi->ctrl->allmulti ? "en" : "dis"); uc_count = netdev_uc_count(dev); mc_count = netdev_mc_count(dev); @@ -1721,8 +1725,8 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev, struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; - vi->ctrl_vid = vid; - sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid)); + vi->ctrl->vid = vid; + sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_ADD, &sg)) @@ -1736,8 +1740,8 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; - vi->ctrl_vid = vid; - sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid)); + vi->ctrl->vid = vid; + sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_DEL, &sg)) @@ -2133,9 +2137,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) { struct scatterlist sg; - vi->ctrl_offloads = cpu_to_virtio64(vi->vdev, offloads); + vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads); - sg_init_one(&sg, &vi->ctrl_offloads, sizeof(vi->ctrl_offloads)); + sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { @@ -2358,6 +2362,7 @@ static void virtnet_free_queues(struct virtnet_info *vi) kfree(vi->rq); kfree(vi->sq); + kfree(vi->ctrl); } static void _free_receive_bufs(struct virtnet_info *vi) @@ -2550,6 +2555,9 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) { int i; + vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); + if (!vi->ctrl) + goto err_ctrl; vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL); if (!vi->sq) goto err_sq; @@ -2578,6 +2586,8 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) err_rq: kfree(vi->sq); err_sq: + kfree(vi->ctrl); +err_ctrl: return -ENOMEM; } From d7fad4c840f33a6bd333dd7fbb3006edbcf0017a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Apr 2018 08:30:49 +0300 Subject: [PATCH 355/660] virtio_net: fix adding vids on big-endian Programming vids (adding or removing them) still passes guest-endian values in the DMA buffer. That's wrong if guest is big-endian and when virtio 1 is enabled. Note: this is on top of a previous patch: virtio_net: split out ctrl buffer Fixes: 9465a7a6f ("virtio_net: enable v1.0 support") Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9a675250107f..16b0c7db431b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -154,7 +154,7 @@ struct control_buf { struct virtio_net_ctrl_mq mq; u8 promisc; u8 allmulti; - u16 vid; + __virtio16 vid; u64 offloads; }; @@ -1725,7 +1725,7 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev, struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; - vi->ctrl->vid = vid; + vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, @@ -1740,7 +1740,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; - vi->ctrl->vid = vid; + vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, From f4ee703ace847f299da00944d57db7ff91786d0b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Apr 2018 08:30:50 +0300 Subject: [PATCH 356/660] virtio_net: sparse annotation fix offloads is a buffer in virtio format, should use the __virtio64 tag. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 16b0c7db431b..770422e953f7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -155,7 +155,7 @@ struct control_buf { u8 promisc; u8 allmulti; __virtio16 vid; - u64 offloads; + __virtio64 offloads; }; struct virtnet_info { From a60faa60da891e311e19fd3e88d611863f431130 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 19 Apr 2018 03:16:16 -0400 Subject: [PATCH 357/660] bnxt_en: Fix memory fault in bnxt_ethtool_init() In some firmware images, the length of BNX_DIR_TYPE_PKG_LOG nvram type could be greater than the fixed buffer length of 4096 bytes allocated by the driver. This was causing HWRM_NVM_READ to copy more data to the buffer than the allocated size, causing general protection fault. Fix the issue by allocating the exact buffer length returned by HWRM_NVM_FIND_DIR_ENTRY, instead of 4096. Move the kzalloc() call into the bnxt_get_pkgver() function. Fixes: 3ebf6f0a09a2 ("bnxt_en: Add installed-package firmware version reporting via Ethtool GDRVINFO") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 49 ++++++++++--------- .../ethernet/broadcom/bnxt/bnxt_nvm_defs.h | 2 - 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1f622ca2a64f..8ba14ae00e8f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1927,22 +1927,39 @@ static char *bnxt_parse_pkglog(int desired_field, u8 *data, size_t datalen) return retval; } -static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen) +static void bnxt_get_pkgver(struct net_device *dev) { + struct bnxt *bp = netdev_priv(dev); u16 index = 0; - u32 datalen; + char *pkgver; + u32 pkglen; + u8 *pkgbuf; + int len; if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_PKG_LOG, BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, - &index, NULL, &datalen) != 0) - return NULL; + &index, NULL, &pkglen) != 0) + return; - memset(buf, 0, buflen); - if (bnxt_get_nvram_item(dev, index, 0, datalen, buf) != 0) - return NULL; + pkgbuf = kzalloc(pkglen, GFP_KERNEL); + if (!pkgbuf) { + dev_err(&bp->pdev->dev, "Unable to allocate memory for pkg version, length = %u\n", + pkglen); + return; + } - return bnxt_parse_pkglog(BNX_PKG_LOG_FIELD_IDX_PKG_VERSION, buf, - datalen); + if (bnxt_get_nvram_item(dev, index, 0, pkglen, pkgbuf)) + goto err; + + pkgver = bnxt_parse_pkglog(BNX_PKG_LOG_FIELD_IDX_PKG_VERSION, pkgbuf, + pkglen); + if (pkgver && *pkgver != 0 && isdigit(*pkgver)) { + len = strlen(bp->fw_ver_str); + snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1, + "/pkg %s", pkgver); + } +err: + kfree(pkgbuf); } static int bnxt_get_eeprom(struct net_device *dev, @@ -2615,22 +2632,10 @@ void bnxt_ethtool_init(struct bnxt *bp) struct hwrm_selftest_qlist_input req = {0}; struct bnxt_test_info *test_info; struct net_device *dev = bp->dev; - char *pkglog; int i, rc; - pkglog = kzalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL); - if (pkglog) { - char *pkgver; - int len; + bnxt_get_pkgver(dev); - pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH); - if (pkgver && *pkgver != 0 && isdigit(*pkgver)) { - len = strlen(bp->fw_ver_str); - snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1, - "/pkg %s", pkgver); - } - kfree(pkglog); - } if (bp->hwrm_spec_code < 0x10704 || !BNXT_SINGLE_PF(bp)) return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h index 73f2249555b5..83444811d3c6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h @@ -59,8 +59,6 @@ enum bnxt_nvm_directory_type { #define BNX_DIR_ATTR_NO_CHKSUM (1 << 0) #define BNX_DIR_ATTR_PROP_STREAM (1 << 1) -#define BNX_PKG_LOG_MAX_LENGTH 4096 - enum bnxnvm_pkglog_field_index { BNX_PKG_LOG_FIELD_IDX_INSTALLED_TIMESTAMP = 0, BNX_PKG_LOG_FIELD_IDX_PKG_DESCRIPTION = 1, From 1255fcb2a655f05e02f3a74675a6d6525f187afd Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 19 Apr 2018 15:56:40 +0200 Subject: [PATCH 358/660] net/smc: fix shutdown in state SMC_LISTEN Calling shutdown with SHUT_RD and SHUT_RDWR for a listening SMC socket crashes, because commit 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") releases the internal clcsock in smc_close_active() and sets smc->clcsock to NULL. For SHUT_RD the smc_close_active() call is removed. For SHUT_RDWR the kernel_sock_shutdown() call is omitted, since the clcsock is already released. Fixes: 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") Signed-off-by: Ursula Braun Reported-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/smc/af_smc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5f8046c62d90..f5d4b69dbabc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1259,14 +1259,12 @@ static int smc_shutdown(struct socket *sock, int how) rc = smc_close_shutdown_write(smc); break; case SHUT_RD: - if (sk->sk_state == SMC_LISTEN) - rc = smc_close_active(smc); - else - rc = 0; - /* nothing more to do because peer is not involved */ + rc = 0; + /* nothing more to do because peer is not involved */ break; } - rc1 = kernel_sock_shutdown(smc->clcsock, how); + if (smc->clcsock) + rc1 = kernel_sock_shutdown(smc->clcsock, how); /* map sock_shutdown_cmd constants to sk_shutdown value range */ sk->sk_shutdown |= how + 1; From df3f126482dba8e00cdbfc8fc44a05a5a35b1704 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 16 Apr 2018 11:58:16 -0500 Subject: [PATCH 359/660] libnvdimm, of_pmem: use dev_to_node() instead of of_node_to_nid() Remove the direct dependency on of_node_to_nid() by using dev_to_node() instead. Any DT platform device will have its NUMA node id set when the device is created. With this, commit 291717b6fbdb ("libnvdimm, of_pmem: workaround OF_NUMA=n build error") can be reverted. Fixes: 717197608952 ("libnvdimm: Add device-tree based driver") Cc: Dan Williams Cc: Oliver O'Halloran Cc: linux-nvdimm@lists.01.org Signed-off-by: Rob Herring Signed-off-by: Dan Williams --- drivers/nvdimm/of_pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 85013bad35de..0a701837dfc0 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -67,7 +67,7 @@ static int of_pmem_region_probe(struct platform_device *pdev) */ memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = of_node_to_nid(np); + ndr_desc.numa_node = dev_to_node(&pdev->dev); ndr_desc.res = &pdev->resource[i]; ndr_desc.of_node = np; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); From f22acf82746dd3d75e06d7c5801926d3b9c50169 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Apr 2018 15:07:42 -0700 Subject: [PATCH 360/660] Revert "libnvdimm, of_pmem: workaround OF_NUMA=n build error" With commit df3f126482db ("libnvdimm, of_pmem: use dev_to_node() instead of of_node_to_nid()") it is now possible to allow of_pmem to be built as a module as originally implemented. Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 85997184e047..9d36473dc2a2 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -103,8 +103,7 @@ config NVDIMM_DAX Select Y if unsure config OF_PMEM - # FIXME: make tristate once OF_NUMA dependency removed - bool "Device-tree support for persistent memory regions" + tristate "Device-tree support for persistent memory regions" depends on OF default LIBNVDIMM help From ef8423022324cf79bd1b41d8707c766461e7e555 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 19 Apr 2018 13:39:43 -0700 Subject: [PATCH 361/660] device-dax: allow MAP_SYNC to succeed MAP_SYNC is a nop for device-dax. Allow MAP_SYNC to succeed on device-dax to eliminate special casing between device-dax and fs-dax as to when the flag can be specified. Device-dax users already implicitly assume that they do not need to call fsync(), and this enables them to explicitly check for this capability. Cc: Fixes: b6fb293f2497 ("mm: Define MAP_SYNC and VM_SYNC flags") Signed-off-by: Dave Jiang Reviewed-by: Dan Williams Signed-off-by: Dan Williams --- drivers/dax/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index be8606457f27..aff2c1594220 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "dax-private.h" #include "dax.h" @@ -540,6 +541,7 @@ static const struct file_operations dax_fops = { .release = dax_release, .get_unmapped_area = dax_get_unmapped_area, .mmap = dax_mmap, + .mmap_supported_flags = MAP_SYNC, }; static void dev_dax_release(struct device *dev) From c5794510d7b5f210f05531ff9e82432cf7244367 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 13 Apr 2018 13:47:40 -0700 Subject: [PATCH 362/660] MAINTAINERS: Add backup maintainers for libnvdimm and DAX Adding additional maintainers to libnvdimm related code and DAX. Signed-off-by: Dave Jiang Acked-by: Ross Zwisler Acked-by: Vishal Verma Signed-off-by: Dan Williams --- MAINTAINERS | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0a1410d5a621..60f89cdc565f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4245,6 +4245,9 @@ F: include/trace/events/fs_dax.h DEVICE DIRECT ACCESS (DAX) M: Dan Williams +M: Dave Jiang +M: Ross Zwisler +M: Vishal Verma L: linux-nvdimm@lists.01.org S: Supported F: drivers/dax/ @@ -8048,6 +8051,9 @@ F: tools/lib/lockdep/ LIBNVDIMM BLK: MMIO-APERTURE DRIVER M: Ross Zwisler +M: Dan Williams +M: Vishal Verma +M: Dave Jiang L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported @@ -8056,6 +8062,9 @@ F: drivers/nvdimm/region_devs.c LIBNVDIMM BTT: BLOCK TRANSLATION TABLE M: Vishal Verma +M: Dan Williams +M: Ross Zwisler +M: Dave Jiang L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported @@ -8063,6 +8072,9 @@ F: drivers/nvdimm/btt* LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER M: Ross Zwisler +M: Dan Williams +M: Vishal Verma +M: Dave Jiang L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported @@ -8078,6 +8090,9 @@ F: Documentation/devicetree/bindings/pmem/pmem-region.txt LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM M: Dan Williams +M: Ross Zwisler +M: Vishal Verma +M: Dave Jiang L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git From 16a34adb9392b2fe4195267475ab5b472e55292c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Apr 2018 22:03:08 -0400 Subject: [PATCH 363/660] Don't leak MNT_INTERNAL away from internal mounts We want it only for the stuff created by SB_KERNMOUNT mounts, *not* for their copies. As it is, creating a deep stack of bindings of /proc/*/ns/* somewhere in a new namespace and exiting yields a stack overflow. Cc: stable@kernel.org Reported-by: Alexander Aring Bisected-by: Kirill Tkhai Tested-by: Kirill Tkhai Tested-by: Alexander Aring Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index e398f32d7541..8634d565b858 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1089,7 +1089,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); + mnt->mnt.mnt_flags = old->mnt.mnt_flags; + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); /* Don't allow unprivileged users to change mount flags */ if (flag & CL_UNPRIVILEGED) { mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; From 05189820da23fc87ee2a7d87c20257f298af27f4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 19 Apr 2018 20:26:00 +0200 Subject: [PATCH 364/660] x86/power/64: Fix page-table setup for temporary text mapping On a system with 4-level page-tables there is no p4d, so the pud in the pgd should be mapped. The old code before commit fb43d6cb91ef already did that. The change from above commit causes an invalid page-table which causes undefined behavior. In one report it caused triple faults. Fix it by changing the p4d back to pud. Fixes: fb43d6cb91ef ('x86/mm: Do not auto-massage page protections') Reported-by: Borislav Petkov Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Tested-by: Michal Kubecek Tested-by: Borislav Petkov Cc: linux-pm@vger.kernel.org Cc: rjw@rjwysocki.net Cc: pavel@ucw.cz Cc: hpa@zytor.com Cc: Dave Hansen Link: https://lkml.kernel.org/r/1524162360-26179-1-git-send-email-joro@8bytes.org --- arch/x86/power/hibernate_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 48b14b534897..ccf4a49bb065 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -98,7 +98,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); } else { /* No p4d for 4-level paging: point the pgd to the pud page table */ - pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot)); + pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot)); set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); } From d7717587ac6deae00e0b66c0113a046be2c6fb1c Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 23 Mar 2018 09:11:29 -0400 Subject: [PATCH 365/660] perf/x86/intel/uncore: Revert "Remove SBOX support for Broadwell server" This reverts commit 3b94a891667c ("perf/x86/intel/uncore: Remove SBOX support for Broadwell server") Revert because there exists a proper workaround for Broadwell-EP servers without SBOX now. Note that BDX-DE does not have a SBOX. Signed-off-by: Stephane Eranian Signed-off-by: Thomas Gleixner Reviewed-by: Kan Liang Acked-by: Peter Zijlstra Cc: ak@linux.intel.com Cc: osk@google.com Cc: mark@voidzero.net Link: https://lkml.kernel.org/r/1521810690-2576-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index c98b943e58b4..5bbbbee11879 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3028,10 +3028,27 @@ static struct intel_uncore_type bdx_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; +static struct intel_uncore_type bdx_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_S0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_SBOX_MSR_OFFSET, + .ops = &hswep_uncore_sbox_msr_ops, + .format_group = &hswep_uncore_sbox_format_group, +}; + +#define BDX_MSR_UNCORE_SBOX 3 + static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, &hswep_uncore_pcu, + &bdx_uncore_sbox, NULL, }; @@ -3047,6 +3064,10 @@ void bdx_uncore_cpu_init(void) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; + /* BDX-DE doesn't have SBOX */ + if (boot_cpu_data.x86_model == 86) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } From 15a3e845b01ce2342cf187dc123c92c44c3c8170 Mon Sep 17 00:00:00 2001 From: Oskar Senft Date: Fri, 23 Mar 2018 09:11:30 -0400 Subject: [PATCH 366/660] perf/x86/intel/uncore: Fix SBOX support for Broadwell CPUs SBOX on some Broadwell CPUs is broken because it's enabled unconditionally despite the fact that there are no SBOXes available. Check the Power Control Unit CAPID4 register to determine the number of available SBOXes on the particular CPU before trying to enable them. If there are none, nullify the SBOX descriptor so it isn't tried to be initialized. Signed-off-by: Oskar Senft Signed-off-by: Thomas Gleixner Tested-by: Mark van Dijk Reviewed-by: Kan Liang Acked-by: Peter Zijlstra Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: eranian@google.com Link: https://lkml.kernel.org/r/1521810690-2576-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 5bbbbee11879..77076a102e34 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3060,14 +3060,25 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { + int pkg = topology_phys_to_logical_pkg(0); + if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) + if (boot_cpu_data.x86_model == 86) { uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + /* Detect systems with no SBOXes */ + } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { + struct pci_dev *pdev; + u32 capid4; + pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]; + pci_read_config_dword(pdev, 0x94, &capid4); + if (((capid4 >> 6) & 0x3) == 0) + bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + } hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } @@ -3285,6 +3296,11 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2), }, + { /* PCU.3 (for Capability registers) */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + HSWEP_PCI_PCU_3), + }, { /* end: all zeroes */ } }; From 621faf4f6a181b6e012c1d1865213f36f4159b7f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 20 Apr 2018 16:52:50 +0300 Subject: [PATCH 367/660] xhci: Fix USB ports for Dell Inspiron 5775 The Dell Inspiron 5775 is a Raven Ridge. The Enable Slot command timed out when a USB device gets plugged: [ 212.156326] xhci_hcd 0000:03:00.3: Error while assigning device slot ID [ 212.156340] xhci_hcd 0000:03:00.3: Max number of devices this xHCI host supports is 64. [ 212.156348] usb usb2-port3: couldn't allocate usb_device AMD suggests that a delay before xHC suspends can fix the issue. I can confirm it fixes the issue, so use the suspend delay quirk for Raven Ridge's xHC. Cc: stable@vger.kernel.org Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index f17b7eab66cf..85ffda85f8ab 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -126,7 +126,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info()) xhci->quirks |= XHCI_AMD_PLL_FIX; - if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x43bb) + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == 0x15e0 || + pdev->device == 0x15e1 || + pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; if (pdev->vendor == PCI_VENDOR_ID_AMD) From 2d79609bf21eedb2142f9dff7d4af9919cd7399a Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 20 Apr 2018 16:52:51 +0300 Subject: [PATCH 368/660] usb: host: xhci-plat: Remove useless test before clk_disable_unprepare clk_disable_unprepare() already checks that the clock pointer is valid. No need to test it before calling it. Signed-off-by: Gregory CLEMENT Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index df327dcc2bac..f0231fea524e 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -320,8 +320,7 @@ put_usb3_hcd: usb_put_hcd(xhci->shared_hcd); disable_clk: - if (!IS_ERR(clk)) - clk_disable_unprepare(clk); + clk_disable_unprepare(clk); put_hcd: usb_put_hcd(hcd); @@ -347,8 +346,7 @@ static int xhci_plat_remove(struct platform_device *dev) usb_remove_hcd(hcd); usb_put_hcd(xhci->shared_hcd); - if (!IS_ERR(clk)) - clk_disable_unprepare(clk); + clk_disable_unprepare(clk); usb_put_hcd(hcd); pm_runtime_set_suspended(&dev->dev); From 3ae2da7b28b393d4f6faef3d384cc725ef39716b Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 20 Apr 2018 16:52:52 +0300 Subject: [PATCH 369/660] usb: host: xhci-plat: Fix clock resource by adding a register clock On Armada 7K/8K we need to explicitly enable the register clock. This clock is optional because not all the SoCs using this IP need it but at least for Armada 7K/8K it is actually mandatory. The change was done at xhci-plat level and not at a xhci-mvebu.c because, it is expected that other SoC would have this kind of constraint. The binding documentation is updating accordingly. Signed-off-by: Gregory CLEMENT Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/usb-xhci.txt | 5 +++- drivers/usb/host/xhci-plat.c | 25 ++++++++++++++++--- drivers/usb/host/xhci.h | 3 ++- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt index c4c00dff4b56..bd1dd316fb23 100644 --- a/Documentation/devicetree/bindings/usb/usb-xhci.txt +++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt @@ -28,7 +28,10 @@ Required properties: - interrupts: one XHCI interrupt should be described here. Optional properties: - - clocks: reference to a clock + - clocks: reference to the clocks + - clock-names: mandatory if there is a second clock, in this case + the name must be "core" for the first clock and "reg" for the + second one - usb2-lpm-disable: indicate if we don't want to enable USB2 HW LPM - usb3-lpm-capable: determines if platform is USB3 LPM capable - quirk-broken-port-ped: set if the controller has broken port disable mechanism diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index f0231fea524e..596e7a71b666 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -157,6 +157,7 @@ static int xhci_plat_probe(struct platform_device *pdev) struct resource *res; struct usb_hcd *hcd; struct clk *clk; + struct clk *reg_clk; int ret; int irq; @@ -226,17 +227,27 @@ static int xhci_plat_probe(struct platform_device *pdev) hcd->rsrc_len = resource_size(res); /* - * Not all platforms have a clk so it is not an error if the - * clock does not exists. + * Not all platforms have clks so it is not an error if the + * clock do not exist. */ + reg_clk = devm_clk_get(&pdev->dev, "reg"); + if (!IS_ERR(reg_clk)) { + ret = clk_prepare_enable(reg_clk); + if (ret) + goto put_hcd; + } else if (PTR_ERR(reg_clk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto put_hcd; + } + clk = devm_clk_get(&pdev->dev, NULL); if (!IS_ERR(clk)) { ret = clk_prepare_enable(clk); if (ret) - goto put_hcd; + goto disable_reg_clk; } else if (PTR_ERR(clk) == -EPROBE_DEFER) { ret = -EPROBE_DEFER; - goto put_hcd; + goto disable_reg_clk; } xhci = hcd_to_xhci(hcd); @@ -252,6 +263,7 @@ static int xhci_plat_probe(struct platform_device *pdev) device_wakeup_enable(hcd->self.controller); xhci->clk = clk; + xhci->reg_clk = reg_clk; xhci->main_hcd = hcd; xhci->shared_hcd = __usb_create_hcd(driver, sysdev, &pdev->dev, dev_name(&pdev->dev), hcd); @@ -322,6 +334,9 @@ put_usb3_hcd: disable_clk: clk_disable_unprepare(clk); +disable_reg_clk: + clk_disable_unprepare(reg_clk); + put_hcd: usb_put_hcd(hcd); @@ -337,6 +352,7 @@ static int xhci_plat_remove(struct platform_device *dev) struct usb_hcd *hcd = platform_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct clk *clk = xhci->clk; + struct clk *reg_clk = xhci->reg_clk; xhci->xhc_state |= XHCI_STATE_REMOVING; @@ -347,6 +363,7 @@ static int xhci_plat_remove(struct platform_device *dev) usb_put_hcd(xhci->shared_hcd); clk_disable_unprepare(clk); + clk_disable_unprepare(reg_clk); usb_put_hcd(hcd); pm_runtime_set_suspended(&dev->dev); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 05c909b04f14..6dfc4867dbcf 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1729,8 +1729,9 @@ struct xhci_hcd { int page_shift; /* msi-x vectors */ int msix_count; - /* optional clock */ + /* optional clocks */ struct clk *clk; + struct clk *reg_clk; /* data structures */ struct xhci_device_context_array *dcbaa; struct xhci_ring *cmd_ring; From 85bd0ba1ff9875798fad94218b627ea9f768f3c3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 21 Jan 2018 16:42:56 +0000 Subject: [PATCH 370/660] arm/arm64: KVM: Add PSCI version selection API Although we've implemented PSCI 0.1, 0.2 and 1.0, we expose either 0.1 or 1.0 to a guest, defaulting to the latest version of the PSCI implementation that is compatible with the requested version. This is no different from doing a firmware upgrade on KVM. But in order to give a chance to hypothetical badly implemented guests that would have a fit by discovering something other than PSCI 0.2, let's provide a new API that allows userspace to pick one particular version of the API. This is implemented as a new class of "firmware" registers, where we expose the PSCI version. This allows the PSCI version to be save/restored as part of a guest migration, and also set to any supported version if the guest requires it. Cc: stable@vger.kernel.org #4.16 Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 9 +++- Documentation/virtual/kvm/arm/psci.txt | 30 +++++++++++++ arch/arm/include/asm/kvm_host.h | 3 ++ arch/arm/include/uapi/asm/kvm.h | 6 +++ arch/arm/kvm/guest.c | 13 ++++++ arch/arm64/include/asm/kvm_host.h | 3 ++ arch/arm64/include/uapi/asm/kvm.h | 6 +++ arch/arm64/kvm/guest.c | 14 +++++- include/kvm/arm_psci.h | 16 ++++++- virt/kvm/arm/psci.c | 60 ++++++++++++++++++++++++++ 10 files changed, 156 insertions(+), 4 deletions(-) create mode 100644 Documentation/virtual/kvm/arm/psci.txt diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 1c7958b57fe9..758bf403a169 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1960,6 +1960,9 @@ ARM 32-bit VFP control registers have the following id bit patterns: ARM 64-bit FP registers have the following id bit patterns: 0x4030 0000 0012 0 +ARM firmware pseudo-registers have the following bit pattern: + 0x4030 0000 0014 + arm64 registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: @@ -1976,6 +1979,9 @@ arm64 CCSIDR registers are demultiplexed by CSSELR value: arm64 system registers have the following id bit patterns: 0x6030 0000 0013 +arm64 firmware pseudo-registers have the following bit pattern: + 0x6030 0000 0014 + MIPS registers are mapped using the lower 32 bits. The upper 16 of that is the register group type: @@ -2510,7 +2516,8 @@ Possible features: and execute guest code when KVM_RUN is called. - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). - - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. + - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 (or a future revision + backward compatible with v0.2) for the CPU. Depends on KVM_CAP_ARM_PSCI_0_2. - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU. Depends on KVM_CAP_ARM_PMU_V3. diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virtual/kvm/arm/psci.txt new file mode 100644 index 000000000000..aafdab887b04 --- /dev/null +++ b/Documentation/virtual/kvm/arm/psci.txt @@ -0,0 +1,30 @@ +KVM implements the PSCI (Power State Coordination Interface) +specification in order to provide services such as CPU on/off, reset +and power-off to the guest. + +The PSCI specification is regularly updated to provide new features, +and KVM implements these updates if they make sense from a virtualization +point of view. + +This means that a guest booted on two different versions of KVM can +observe two different "firmware" revisions. This could cause issues if +a given guest is tied to a particular PSCI revision (unlikely), or if +a migration causes a different PSCI version to be exposed out of the +blue to an unsuspecting guest. + +In order to remedy this situation, KVM exposes a set of "firmware +pseudo-registers" that can be manipulated using the GET/SET_ONE_REG +interface. These registers can be saved/restored by userspace, and set +to a convenient value if required. + +The following register is defined: + +* KVM_REG_ARM_PSCI_VERSION: + + - Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set + (and thus has already been initialized) + - Returns the current PSCI version on GET_ONE_REG (defaulting to the + highest PSCI version implemented by KVM and compatible with v0.2) + - Allows any PSCI version implemented by KVM and compatible with + v0.2 to be set with SET_ONE_REG + - Affects the whole VM (even if the register view is per-vcpu) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c6a749568dd6..c7c28c885a19 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -77,6 +77,9 @@ struct kvm_arch { /* Interrupt controller */ struct vgic_dist vgic; int max_vcpus; + + /* Mandated version of PSCI */ + u32 psci_version; }; #define KVM_NR_MEM_OBJS 40 diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 2ba95d6fe852..caae4843cb70 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -195,6 +195,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 1e0784ebbfd6..a18f33edc471 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -176,6 +177,7 @@ static unsigned long num_core_regs(void) unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS; } @@ -196,6 +198,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices); + if (ret) + return ret; + uindices += kvm_arm_get_fw_num_regs(vcpu); + ret = copy_timer_indices(vcpu, uindices); if (ret) return ret; @@ -214,6 +221,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + return kvm_arm_get_fw_reg(vcpu, reg); + if (is_timer_reg(reg->id)) return get_timer_reg(vcpu, reg); @@ -230,6 +240,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + return kvm_arm_set_fw_reg(vcpu, reg); + if (is_timer_reg(reg->id)) return set_timer_reg(vcpu, reg); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ab46bc70add6..469de8acd06f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -75,6 +75,9 @@ struct kvm_arch { /* Interrupt controller */ struct vgic_dist vgic; + + /* Mandated version of PSCI */ + u32 psci_version; }; #define KVM_NR_MEM_OBJS 40 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 9abbf3044654..04b3256f8e6d 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -206,6 +206,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 959e50d2588c..56a0260ceb11 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -205,7 +206,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) - + NUM_TIMER_REGS; + + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS; } /** @@ -225,6 +226,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices); + if (ret) + return ret; + uindices += kvm_arm_get_fw_num_regs(vcpu); + ret = copy_timer_indices(vcpu, uindices); if (ret) return ret; @@ -243,6 +249,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + return kvm_arm_get_fw_reg(vcpu, reg); + if (is_timer_reg(reg->id)) return get_timer_reg(vcpu, reg); @@ -259,6 +268,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + return kvm_arm_set_fw_reg(vcpu, reg); + if (is_timer_reg(reg->id)) return set_timer_reg(vcpu, reg); diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index e518e4e3dfb5..4b1548129fa2 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -37,10 +37,15 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) * Our PSCI implementation stays the same across versions from * v0.2 onward, only adding the few mandatory functions (such * as FEATURES with 1.0) that are required by newer - * revisions. It is thus safe to return the latest. + * revisions. It is thus safe to return the latest, unless + * userspace has instructed us otherwise. */ - if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) { + if (vcpu->kvm->arch.psci_version) + return vcpu->kvm->arch.psci_version; + return KVM_ARM_PSCI_LATEST; + } return KVM_ARM_PSCI_0_1; } @@ -48,4 +53,11 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); +struct kvm_one_reg; + +int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + #endif /* __KVM_ARM_PSCI_H__ */ diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 6919352cbf15..c4762bef13c6 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -427,3 +428,62 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) smccc_set_retval(vcpu, val, 0, 0, 0); return 1; } + +int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) +{ + return 1; /* PSCI version */ +} + +int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices)) + return -EFAULT; + + return 0; +} + +int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + if (reg->id == KVM_REG_ARM_PSCI_VERSION) { + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_psci_version(vcpu, vcpu->kvm); + if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; + } + + return -EINVAL; +} + +int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + if (reg->id == KVM_REG_ARM_PSCI_VERSION) { + void __user *uaddr = (void __user *)(long)reg->addr; + bool wants_02; + u64 val; + + if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); + + switch (val) { + case KVM_ARM_PSCI_0_1: + if (wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + case KVM_ARM_PSCI_0_2: + case KVM_ARM_PSCI_1_0: + if (!wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + } + } + + return -EINVAL; +} From b346e492d7127e4332d5a9989b844b2095cc4fcd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 16 Apr 2018 16:59:13 -0700 Subject: [PATCH 371/660] crypto: api - fix finding algorithm currently being tested Commit eb02c38f0197 ("crypto: api - Keep failed instances alive") is making allocating crypto transforms sometimes fail with ELIBBAD, when multiple processes try to access encrypted files with fscrypt for the first time since boot. The problem is that the "request larval" for the algorithm is being mistaken for an algorithm which failed its tests. Fix it by only returning ELIBBAD for "non-larval" algorithms. Also don't leak a reference to the algorithm. Fixes: eb02c38f0197 ("crypto: api - Keep failed instances alive") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/api.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 1d5290c67108..0ee632bba064 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -204,9 +204,14 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, down_read(&crypto_alg_sem); alg = __crypto_alg_lookup(name, type | test, mask | test); - if (!alg && test) - alg = __crypto_alg_lookup(name, type, mask) ? - ERR_PTR(-ELIBBAD) : NULL; + if (!alg && test) { + alg = __crypto_alg_lookup(name, type, mask); + if (alg && !crypto_is_larval(alg)) { + /* Test failed */ + crypto_mod_put(alg); + alg = ERR_PTR(-ELIBBAD); + } + } up_read(&crypto_alg_sem); return alg; From eea0d3ea7546961f69f55b26714ac8fd71c7c020 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 12 Apr 2018 08:40:55 +0200 Subject: [PATCH 372/660] crypto: drbg - set freed buffers to NULL During freeing of the internal buffers used by the DRBG, set the pointer to NULL. It is possible that the context with the freed buffers is reused. In case of an error during initialization where the pointers do not yet point to allocated memory, the NULL value prevents a double free. Cc: stable@vger.kernel.org Fixes: 3cfc3b9721123 ("crypto: drbg - use aligned buffers") Signed-off-by: Stephan Mueller Reported-by: syzbot+75397ee3df5c70164154@syzkaller.appspotmail.com Signed-off-by: Herbert Xu --- crypto/drbg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/drbg.c b/crypto/drbg.c index 4faa2781c964..466a112a4446 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1134,8 +1134,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) if (!drbg) return; kzfree(drbg->Vbuf); + drbg->Vbuf = NULL; drbg->V = NULL; kzfree(drbg->Cbuf); + drbg->Cbuf = NULL; drbg->C = NULL; kzfree(drbg->scratchpadbuf); drbg->scratchpadbuf = NULL; From 660625922b3d9fcb376e5870299bc5c1086e1d32 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Apr 2018 09:38:34 +0100 Subject: [PATCH 373/660] afs: Fix server record deletion AFS server records get removed from the net->fs_servers tree when they're deleted, but not from the net->fs_addresses{4,6} lists, which can lead to an oops in afs_find_server() when a server record has been removed, for instance during rmmod. Fix this by deleting the record from the by-address lists before posting it for RCU destruction. The reason this hasn't been noticed before is that the fileserver keeps probing the local cache manager, thereby keeping the service record alive, so the oops would only happen when a fileserver eventually gets bored and stops pinging or if the module gets rmmod'd and a call comes in from the fileserver during the window between the server records being destroyed and the socket being closed. The oops looks something like: BUG: unable to handle kernel NULL pointer dereference at 000000000000001c ... Workqueue: kafsd afs_process_async_call [kafs] RIP: 0010:afs_find_server+0x271/0x36f [kafs] ... Call Trace: afs_deliver_cb_init_call_back_state3+0x1f2/0x21f [kafs] afs_deliver_to_call+0x1ee/0x5e8 [kafs] afs_process_async_call+0x5b/0xd0 [kafs] process_one_work+0x2c2/0x504 worker_thread+0x1d4/0x2ac kthread+0x11f/0x127 ret_from_fork+0x24/0x30 Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/server.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/afs/server.c b/fs/afs/server.c index e23be63998a8..629c74986cff 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -428,8 +428,15 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) } write_sequnlock(&net->fs_lock); - if (deleted) + if (deleted) { + write_seqlock(&net->fs_addr_lock); + if (!hlist_unhashed(&server->addr4_link)) + hlist_del_rcu(&server->addr4_link); + if (!hlist_unhashed(&server->addr6_link)) + hlist_del_rcu(&server->addr6_link); + write_sequnlock(&net->fs_addr_lock); afs_destroy_server(net, server); + } } } From a9e5b73288cf1595ac2e05cf1acd1924ceea05fa Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Apr 2018 13:35:02 +0100 Subject: [PATCH 374/660] vfs: Undo an overly zealous MS_RDONLY -> SB_RDONLY conversion In do_mount() when the MS_* flags are being converted to MNT_* flags, MS_RDONLY got accidentally convered to SB_RDONLY. Undo this change. Fixes: e462ec50cb5f ("VFS: Differentiate mount flags (MS_*) from internal superblock flags") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 8634d565b858..5f75969adff1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2815,7 +2815,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); - if (flags & SB_RDONLY) + if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; /* The default atime for remount is preservation */ From 5e388e95815408c27f3612190d089afc0774b870 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 18 Apr 2018 09:41:54 +0300 Subject: [PATCH 375/660] btrfs: Fix race condition between delayed refs and blockgroup removal When the delayed refs for a head are all run, eventually cleanup_ref_head is called which (in case of deletion) obtains a reference for the relevant btrfs_space_info struct by querying the bg for the range. This is problematic because when the last extent of a bg is deleted a race window emerges between removal of that bg and the subsequent invocation of cleanup_ref_head. This can result in cache being null and either a null pointer dereference or assertion failure. task: ffff8d04d31ed080 task.stack: ffff9e5dc10cc000 RIP: 0010:assfail.constprop.78+0x18/0x1a [btrfs] RSP: 0018:ffff9e5dc10cfbe8 EFLAGS: 00010292 RAX: 0000000000000044 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8d04ffc1f868 RSI: ffff8d04ffc178c8 RDI: ffff8d04ffc178c8 RBP: ffff8d04d29e5ea0 R08: 00000000000001f0 R09: 0000000000000001 R10: ffff9e5dc0507d58 R11: 0000000000000001 R12: ffff8d04d29e5ea0 R13: ffff8d04d29e5f08 R14: ffff8d04efe29b40 R15: ffff8d04efe203e0 FS: 00007fbf58ead500(0000) GS:ffff8d04ffc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe6c6975648 CR3: 0000000013b2a000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __btrfs_run_delayed_refs+0x10e7/0x12c0 [btrfs] btrfs_run_delayed_refs+0x68/0x250 [btrfs] btrfs_should_end_transaction+0x42/0x60 [btrfs] btrfs_truncate_inode_items+0xaac/0xfc0 [btrfs] btrfs_evict_inode+0x4c6/0x5c0 [btrfs] evict+0xc6/0x190 do_unlinkat+0x19c/0x300 do_syscall_64+0x74/0x140 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fbf589c57a7 To fix this, introduce a new flag "is_system" to head_ref structs, which is populated at insertion time. This allows to decouple the querying for the spaceinfo from querying the possibly deleted bg. Fixes: d7eae3403f46 ("Btrfs: rework delayed ref total_bytes_pinned accounting") CC: stable@vger.kernel.org # 4.14+ Suggested-by: Omar Sandoval Signed-off-by: Nikolay Borisov Reviewed-by: Omar Sandoval Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 19 ++++++++++++++----- fs/btrfs/delayed-ref.h | 1 + fs/btrfs/extent-tree.c | 16 +++++++++++----- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9e98295de7ce..e1b0651686f7 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_qgroup_extent_record *qrecord, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, - int action, int is_data, int *qrecord_inserted_ret, + int action, int is_data, int is_system, + int *qrecord_inserted_ret, int *old_ref_mod, int *new_ref_mod) + { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_root *delayed_refs; @@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref->ref_mod = count_mod; head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; + head_ref->is_system = is_system; head_ref->ref_tree = RB_ROOT; INIT_LIST_HEAD(&head_ref->ref_add_list); RB_CLEAR_NODE(&head_ref->href_node); @@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; int qrecord_inserted; + int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID); BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); @@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, 0, 0, action, 0, - &qrecord_inserted, old_ref_mod, - new_ref_mod); + is_system, &qrecord_inserted, + old_ref_mod, new_ref_mod); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, level, action); @@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, ref_root, reserved, - action, 1, &qrecord_inserted, + action, 1, 0, &qrecord_inserted, old_ref_mod, new_ref_mod); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, @@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); + /* + * extent_ops just modify the flags of an extent and they don't result + * in ref count changes, hence it's safe to pass false/0 for is_system + * argument + */ add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data, NULL, NULL, NULL); + extent_op->is_data, 0, NULL, NULL, NULL); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 741869dbc316..7f00db50bd24 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head { */ unsigned int must_insert_reserved:1; unsigned int is_data:1; + unsigned int is_system:1; unsigned int processing:1; }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 055494ddcace..f99102063366 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, trace_run_delayed_ref_head(fs_info, head, 0); if (head->total_ref_mod < 0) { - struct btrfs_block_group_cache *cache; + struct btrfs_space_info *space_info; + u64 flags; - cache = btrfs_lookup_block_group(fs_info, head->bytenr); - ASSERT(cache); - percpu_counter_add(&cache->space_info->total_bytes_pinned, + if (head->is_data) + flags = BTRFS_BLOCK_GROUP_DATA; + else if (head->is_system) + flags = BTRFS_BLOCK_GROUP_SYSTEM; + else + flags = BTRFS_BLOCK_GROUP_METADATA; + space_info = __find_space_info(fs_info, flags); + ASSERT(space_info); + percpu_counter_add(&space_info->total_bytes_pinned, -head->num_bytes); - btrfs_put_block_group(cache); if (head->is_data) { spin_lock(&delayed_refs->lock); From c0872323746e11fc79344e3738b283a8cda86654 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 11 Apr 2018 17:08:12 +0800 Subject: [PATCH 376/660] btrfs: print-tree: debugging output enhancement This patch enhances the following things: - tree block header * add generation and owner output for node and leaf - node pointer generation output - allow btrfs_print_tree() to not follow nodes * just like btrfs-progs Please note that, although function btrfs_print_tree() is not called by anyone right now, it's still a pretty useful function to debug kernel. So that function is still kept for later use. Signed-off-by: Qu Wenruo Reviewed-by: Lu Fengqi Signed-off-by: David Sterba --- fs/btrfs/print-tree.c | 25 +++++++++++++++---------- fs/btrfs/print-tree.h | 2 +- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 124276bba8cf..21a831d3d087 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l) fs_info = l->fs_info; nr = btrfs_header_nritems(l); - btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", - btrfs_header_bytenr(l), nr, - btrfs_leaf_free_space(fs_info, l)); + btrfs_info(fs_info, + "leaf %llu gen %llu total ptrs %d free space %d owner %llu", + btrfs_header_bytenr(l), btrfs_header_generation(l), nr, + btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(i); btrfs_item_key_to_cpu(l, &key, i); @@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l) } } -void btrfs_print_tree(struct extent_buffer *c) +void btrfs_print_tree(struct extent_buffer *c, bool follow) { struct btrfs_fs_info *fs_info; int i; u32 nr; @@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c) return; } btrfs_info(fs_info, - "node %llu level %d total ptrs %d free spc %u", - btrfs_header_bytenr(c), level, nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr); + "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu", + btrfs_header_bytenr(c), level, btrfs_header_generation(c), + nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr, + btrfs_header_owner(c)); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - pr_info("\tkey %d (%llu %u %llu) block %llu\n", + pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n", i, key.objectid, key.type, key.offset, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_node_ptr_generation(c, i)); } + if (!follow) + return; for (i = 0; i < nr; i++) { struct btrfs_key first_key; struct extent_buffer *next; @@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c) if (btrfs_header_level(next) != level - 1) BUG(); - btrfs_print_tree(next); + btrfs_print_tree(next, follow); free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 4a98481688f4..e6bb38fd75ad 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -7,6 +7,6 @@ #define BTRFS_PRINT_TREE_H void btrfs_print_leaf(struct extent_buffer *l); -void btrfs_print_tree(struct extent_buffer *c); +void btrfs_print_tree(struct extent_buffer *c, bool follow); #endif From ff30b89e0ab71115cbad6ae10a58bd83fe18b41f Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Apr 2018 12:17:10 -0700 Subject: [PATCH 377/660] cifs: smbd: Dump SMB packet when configured When sending through SMB Direct, also dump the packet in SMB send path. Also fixed a typo in debug message. Signed-off-by: Long Li Cc: stable@vger.kernel.org Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/smbdirect.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index d611ed0537fd..87817ddcc096 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1028,7 +1028,7 @@ static int smbd_post_send(struct smbd_connection *info, for (i = 0; i < request->num_sge; i++) { log_rdma_send(INFO, "rdma_request sge[%d] addr=%llu length=%u\n", - i, request->sge[0].addr, request->sge[0].length); + i, request->sge[i].addr, request->sge[i].length); ib_dma_sync_single_for_device( info->id->device, request->sge[i].addr, @@ -2139,6 +2139,10 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) goto done; } + cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen); + for (i = 0; i < rqst->rq_nvec-1; i++) + dump_smb(iov[i].iov_base, iov[i].iov_len); + remaining_data_length = buflen; log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d " From 0cf22d6b317ce0103b7d5a47878aa5ef94240433 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Apr 2018 12:56:36 -0500 Subject: [PATCH 378/660] PCI: Add "PCIe" to pcie_print_link_status() messages Currently the pcie_print_link_status() will print PCIe bandwidth and link width information but does not mention it is pertaining to the PCIe. Since this and related functions are used exclusively by networking drivers today users may get confused into thinking that it's the NIC bandwidth that is being talked about. Insert a "PCIe" into the messages. Signed-off-by: Jakub Kicinski Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e597655a5643..a04197ce767d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5273,11 +5273,11 @@ void pcie_print_link_status(struct pci_dev *dev) bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width); if (bw_avail >= bw_cap) - pci_info(dev, "%u.%03u Gb/s available bandwidth (%s x%d link)\n", + pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth (%s x%d link)\n", bw_cap / 1000, bw_cap % 1000, PCIE_SPEED2STR(speed_cap), width_cap); else - pci_info(dev, "%u.%03u Gb/s available bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n", + pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n", bw_avail / 1000, bw_avail % 1000, PCIE_SPEED2STR(speed), width, limiting_dev ? pci_name(limiting_dev) : "", From 1d0cffa674cfa7d185a302c8c6850fc50b893bed Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 20 Apr 2018 12:19:07 -0500 Subject: [PATCH 379/660] cifs: do not allow creating sockets except with SMB1 posix exensions RHBZ: 1453123 Since at least the 3.10 kernel and likely a lot earlier we have not been able to create unix domain sockets in a cifs share when mounted using the SFU mount option (except when mounted with the cifs unix extensions to Samba e.g.) Trying to create a socket, for example using the af_unix command from xfstests will cause : BUG: unable to handle kernel NULL pointer dereference at 00000000 00000040 Since no one uses or depends on being able to create unix domains sockets on a cifs share the easiest fix to stop this vulnerability is to simply not allow creation of any other special files than char or block devices when sfu is used. Added update to Ronnie's patch to handle a tcon link leak, and to address a buf leak noticed by Gustavo and Colin. Acked-by: Gustavo A. R. Silva CC: Colin Ian King Reviewed-by: Pavel Shilovsky Reported-by: Eryu Guan Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Cc: stable@vger.kernel.org --- fs/cifs/dir.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 81ba6e0d88d8..925844343038 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -684,6 +684,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, goto mknod_out; } + if (!S_ISCHR(mode) && !S_ISBLK(mode)) + goto mknod_out; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) goto mknod_out; @@ -692,10 +695,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { - kfree(full_path); rc = -ENOMEM; - free_xid(xid); - return rc; + goto mknod_out; } if (backup_cred(cifs_sb)) @@ -742,7 +743,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, pdev->minor = cpu_to_le64(MINOR(device_number)); rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, &bytes_written, iov, 1); - } /* else if (S_ISFIFO) */ + } tcon->ses->server->ops->close(xid, tcon, &fid); d_drop(direntry); From 596632de0440baecaccc9d4347329c64661c400f Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 19 Apr 2018 10:44:20 +0200 Subject: [PATCH 380/660] CIFS: fix typo in cifs_dbg Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reported-by: Long Li --- fs/cifs/cifs_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index fe5567655662..0e74690d11bc 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -54,7 +54,7 @@ do { \ pr_debug_ ## ratefunc("%s: " \ fmt, __FILE__, ##__VA_ARGS__); \ } else if ((type) & VFS) { \ - pr_err_ ## ratefunc("CuIFS VFS: " \ + pr_err_ ## ratefunc("CIFS VFS: " \ fmt, ##__VA_ARGS__); \ } else if ((type) & NOISY && (NOISY != 0)) { \ pr_debug_ ## ratefunc(fmt, ##__VA_ARGS__); \ From 6ab690aa439803347743c0d899ac422774fdd5e7 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 20 Apr 2018 18:16:30 +0200 Subject: [PATCH 381/660] bpf: sockmap remove dead check Remove dead code that bails on `attr->value_size > KMALLOC_MAX_SIZE` - the previous check already bails on `attr->value_size != 4`. Signed-off-by: Jann Horn Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 8dd9210d7db7..a3b21385e947 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1442,9 +1442,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); - if (attr->value_size > KMALLOC_MAX_SIZE) - return ERR_PTR(-E2BIG); - err = bpf_tcp_ulp_register(); if (err && err != -EEXIST) return ERR_PTR(err); From 007b4e8b705a4eff184d567c5a8b496622f9e116 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Thu, 5 Apr 2018 14:57:59 +0200 Subject: [PATCH 382/660] mtd: rawnand: tango: Fix struct clk memory leak Use devm_clk_get() to let Linux manage struct clk memory. Fixes: 6956e2385a16 ("add tango NAND flash controller support") Cc: stable@vger.kernel.org Reported-by: Xidong Wang Signed-off-by: Marc Gonzalez Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/tango_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c index f54518ffb36a..f2052fae21c7 100644 --- a/drivers/mtd/nand/raw/tango_nand.c +++ b/drivers/mtd/nand/raw/tango_nand.c @@ -645,7 +645,7 @@ static int tango_nand_probe(struct platform_device *pdev) writel_relaxed(MODE_RAW, nfc->pbus_base + PBUS_PAD_MODE); - clk = clk_get(&pdev->dev, NULL); + clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); From e01e80634ecdde1dd113ac43b3adad21b47f3957 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 20 Apr 2018 14:55:31 -0700 Subject: [PATCH 383/660] fork: unconditionally clear stack on fork One of the classes of kernel stack content leaks[1] is exposing the contents of prior heap or stack contents when a new process stack is allocated. Normally, those stacks are not zeroed, and the old contents remain in place. In the face of stack content exposure flaws, those contents can leak to userspace. Fixing this will make the kernel no longer vulnerable to these flaws, as the stack will be wiped each time a stack is assigned to a new process. There's not a meaningful change in runtime performance; it almost looks like it provides a benefit. Performing back-to-back kernel builds before: Run times: 157.86 157.09 158.90 160.94 160.80 Mean: 159.12 Std Dev: 1.54 and after: Run times: 159.31 157.34 156.71 158.15 160.81 Mean: 158.46 Std Dev: 1.46 Instead of making this a build or runtime config, Andy Lutomirski recommended this just be enabled by default. [1] A noisy search for many kinds of stack content leaks can be seen here: https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=linux+kernel+stack+leak I did some more with perf and cycle counts on running 100,000 execs of /bin/true. before: Cycles: 218858861551 218853036130 214727610969 227656844122 224980542841 Mean: 221015379122.60 Std Dev: 4662486552.47 after: Cycles: 213868945060 213119275204 211820169456 224426673259 225489986348 Mean: 217745009865.40 Std Dev: 5935559279.99 It continues to look like it's faster, though the deviation is rather wide, but I'm not sure what I could do that would be less noisy. I'm open to ideas! Link: http://lkml.kernel.org/r/20180221021659.GA37073@beast Signed-off-by: Kees Cook Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Andy Lutomirski Cc: Laura Abbott Cc: Rasmus Villemoes Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 6 +----- kernel/fork.c | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 34f053a150a9..cf2862bd134a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -43,11 +43,7 @@ enum { #define THREAD_ALIGN THREAD_SIZE #endif -#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) -#else -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT) -#endif +#define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) /* * flag set/clear/test wrappers diff --git a/kernel/fork.c b/kernel/fork.c index 242c8c93d285..a5d21c42acfc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -216,10 +216,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; -#ifdef CONFIG_DEBUG_KMEMLEAK /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); -#endif + tsk->stack_vm_area = s; return s->addr; } From 8f175cf5c99dc0e3add2aac0ea1cd54e0f9ca87d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 Apr 2018 14:55:35 -0700 Subject: [PATCH 384/660] mm: fix do_pages_move status handling Li Wang has reported that LTP move_pages04 test fails with the current tree: LTP move_pages04: TFAIL : move_pages04.c:143: status[1] is EPERM, expected EFAULT The test allocates an array of two pages, one is present while the other is not (resp. backed by zero page) and it expects EFAULT for the second page as the man page suggests. We are reporting EPERM which doesn't make any sense and this is a result of a bug from cf5f16b23ec9 ("mm: unclutter THP migration"). do_pages_move tries to handle as many pages in one batch as possible so we queue all pages with the same node target together and that corresponds to [start, i] range which is then used to update status array. add_page_for_migration will correctly notice the zero (resp. !present) page and returns with EFAULT which gets written to the status. But if this is the last page in the array we do not update start and so the last store_status after the loop will overwrite the range of the last batch with NUMA_NO_NODE (which corresponds to EPERM). Fix this by simply bailing out from the last flush if the pagelist is empty as there is clearly nothing more to do. Link: http://lkml.kernel.org/r/20180418121255.334-1-mhocko@kernel.org Fixes: cf5f16b23ec9 ("mm: unclutter THP migration") Signed-off-by: Michal Hocko Reported-by: Li Wang Tested-by: Li Wang Cc: Zi Yan Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index f65dd69e1fd1..70ef794cccae 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1622,6 +1622,9 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, current_node = NUMA_NO_NODE; } out_flush: + if (list_empty(&pagelist)) + return err; + /* Make sure we do not overwrite the existing error */ err1 = do_move_pages_to_node(mm, &pagelist, current_node); if (!err1) From 88c28f2469151b031f8cea9b28ed5be1b74a4172 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 20 Apr 2018 14:55:38 -0700 Subject: [PATCH 385/660] mm, pagemap: fix swap offset value for PMD migration entry The swap offset reported by /proc//pagemap may be not correct for PMD migration entries. If addr passed into pagemap_pmd_range() isn't aligned with PMD start address, the swap offset reported doesn't reflect this. And in the loop to report information of each sub-page, the swap offset isn't increased accordingly as that for PFN. This may happen after opening /proc//pagemap and seeking to a page whose address doesn't align with a PMD start address. I have verified this with a simple test program. BTW: migration swap entries have PFN information, do we need to restrict whether to show them? [akpm@linux-foundation.org: fix typo, per Huang, Ying] Link: http://lkml.kernel.org/r/20180408033737.10897-1-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Andrei Vagin Cc: Dan Williams Cc: "Jerome Glisse" Cc: Daniel Colascione Cc: Zi Yan Cc: Naoya Horiguchi Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 65ae54659833..c486ad4b43f0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1310,9 +1310,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); + unsigned long offset = swp_offset(entry); + offset += (addr & ~PMD_MASK) >> PAGE_SHIFT; frame = swp_type(entry) | - (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + (offset << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; @@ -1332,6 +1334,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, break; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; + else if (flags & PM_SWAP) + frame += (1 << MAX_SWAPFILES_SHIFT); } spin_unlock(ptl); return err; From 2e898e4c0a3897ccd434adac5abb8330194f527b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 20 Apr 2018 14:55:42 -0700 Subject: [PATCH 386/660] writeback: safer lock nesting lock_page_memcg()/unlock_page_memcg() use spin_lock_irqsave/restore() if the page's memcg is undergoing move accounting, which occurs when a process leaves its memcg for a new one that has memory.move_charge_at_immigrate set. unlocked_inode_to_wb_begin,end() use spin_lock_irq/spin_unlock_irq() if the given inode is switching writeback domains. Switches occur when enough writes are issued from a new domain. This existing pattern is thus suspicious: lock_page_memcg(page); unlocked_inode_to_wb_begin(inode, &locked); ... unlocked_inode_to_wb_end(inode, locked); unlock_page_memcg(page); If both inode switch and process memcg migration are both in-flight then unlocked_inode_to_wb_end() will unconditionally enable interrupts while still holding the lock_page_memcg() irq spinlock. This suggests the possibility of deadlock if an interrupt occurs before unlock_page_memcg(). truncate __cancel_dirty_page lock_page_memcg unlocked_inode_to_wb_begin unlocked_inode_to_wb_end end_page_writeback test_clear_page_writeback lock_page_memcg unlock_page_memcg Due to configuration limitations this deadlock is not currently possible because we don't mix cgroup writeback (a cgroupv2 feature) and memory.move_charge_at_immigrate (a cgroupv1 feature). If the kernel is hacked to always claim inode switching and memcg moving_account, then this script triggers lockup in less than a minute: cd /mnt/cgroup/memory mkdir a b echo 1 > a/memory.move_charge_at_immigrate echo 1 > b/memory.move_charge_at_immigrate ( echo $BASHPID > a/cgroup.procs while true; do dd if=/dev/zero of=/mnt/big bs=1M count=256 done ) & while true; do sync done & sleep 1h & SLEEP=$! while true; do echo $SLEEP > a/cgroup.procs echo $SLEEP > b/cgroup.procs done The deadlock does not seem possible, so it's debatable if there's any reason to modify the kernel. I suggest we should to prevent future surprises. And Wang Long said "this deadlock occurs three times in our environment", so there's more reason to apply this, even to stable. Stable 4.4 has minor conflicts applying this patch. For a clean 4.4 patch see "[PATCH for-4.4] writeback: safer lock nesting" https://lkml.org/lkml/2018/4/11/146 Wang Long said "this deadlock occurs three times in our environment" [gthelen@google.com: v4] Link: http://lkml.kernel.org/r/20180411084653.254724-1-gthelen@google.com [akpm@linux-foundation.org: comment tweaks, struct initialization simplification] Change-Id: Ibb773e8045852978f6207074491d262f1b3fb613 Link: http://lkml.kernel.org/r/20180410005908.167976-1-gthelen@google.com Fixes: 682aa8e1a6a1 ("writeback: implement unlocked_inode_to_wb transaction and use it for stat updates") Signed-off-by: Greg Thelen Reported-by: Wang Long Acked-by: Wang Long Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Johannes Weiner Cc: Tejun Heo Cc: Nicholas Piggin Cc: [v4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 7 ++++--- include/linux/backing-dev-defs.h | 5 +++++ include/linux/backing-dev.h | 30 ++++++++++++++++-------------- mm/page-writeback.c | 18 +++++++++--------- 4 files changed, 34 insertions(+), 26 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4b12ba70a895..47d7c151fcba 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -745,11 +745,12 @@ int inode_congested(struct inode *inode, int cong_bits) */ if (inode && inode_to_wb_is_valid(inode)) { struct bdi_writeback *wb; - bool locked, congested; + struct wb_lock_cookie lock_cookie = {}; + bool congested; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &lock_cookie); congested = wb_congested(wb, cong_bits); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &lock_cookie); return congested; } diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index bfe86b54f6c1..0bd432a4d7bd 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -223,6 +223,11 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) set_wb_congested(bdi->wb.congested, sync); } +struct wb_lock_cookie { + bool locked; + unsigned long flags; +}; + #ifdef CONFIG_CGROUP_WRITEBACK /** diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f6be4b0b6c18..72ca0f3d39f3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -347,7 +347,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode - * @lockedp: temp bool output param, to be passed to the end function + * @cookie: output param, to be passed to the end function * * The caller wants to access the wb associated with @inode but isn't * holding inode->i_lock, the i_pages lock or wb->list_lock. This @@ -355,12 +355,12 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) * association doesn't change until the transaction is finished with * unlocked_inode_to_wb_end(). * - * The caller must call unlocked_inode_to_wb_end() with *@lockdep - * afterwards and can't sleep during transaction. IRQ may or may not be - * disabled on return. + * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and + * can't sleep during the transaction. IRQs may or may not be disabled on + * return. */ static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { rcu_read_lock(); @@ -368,10 +368,10 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) * Paired with store_release in inode_switch_wb_work_fn() and * ensures that we see the new wb if we see cleared I_WB_SWITCH. */ - *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; + cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; - if (unlikely(*lockedp)) - xa_lock_irq(&inode->i_mapping->i_pages); + if (unlikely(cookie->locked)) + xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags); /* * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages @@ -383,12 +383,13 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) /** * unlocked_inode_to_wb_end - end inode wb access transaction * @inode: target inode - * @locked: *@lockedp from unlocked_inode_to_wb_begin() + * @cookie: @cookie from unlocked_inode_to_wb_begin() */ -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { - if (unlikely(locked)) - xa_unlock_irq(&inode->i_mapping->i_pages); + if (unlikely(cookie->locked)) + xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags); rcu_read_unlock(); } @@ -435,12 +436,13 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) } static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { return inode_to_wb(inode); } -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5c1a3279e63f..337c6afb3345 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2502,13 +2502,13 @@ void account_page_redirty(struct page *page) if (mapping && mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); current->nr_dirtied--; dec_node_page_state(page, NR_DIRTIED); dec_wb_stat(wb, WB_DIRTIED); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); } } EXPORT_SYMBOL(account_page_redirty); @@ -2614,15 +2614,15 @@ void __cancel_dirty_page(struct page *page) if (mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; lock_page_memcg(page); - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) account_page_cleaned(page, mapping, wb); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); unlock_page_memcg(page); } else { ClearPageDirty(page); @@ -2654,7 +2654,7 @@ int clear_page_dirty_for_io(struct page *page) if (mapping && mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; /* * Yes, Virginia, this is indeed insane. @@ -2691,14 +2691,14 @@ int clear_page_dirty_for_io(struct page *page) * always locked coming in here, so we get the desired * exclusion. */ - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) { dec_lruvec_page_state(page, NR_FILE_DIRTY); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); ret = 1; } - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); return ret; } return TestClearPageDirty(page); From e71769ae52609ea0044a9901709042e5634c2306 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 20 Apr 2018 14:55:45 -0700 Subject: [PATCH 387/660] mm: enable thp migration for shmem thp My testing for the latest kernel supporting thp migration showed an infinite loop in offlining the memory block that is filled with shmem thps. We can get out of the loop with a signal, but kernel should return with failure in this case. What happens in the loop is that scan_movable_pages() repeats returning the same pfn without any progress. That's because page migration always fails for shmem thps. In memory offline code, memory blocks containing unmovable pages should be prevented from being offline targets by has_unmovable_pages() inside start_isolate_page_range(). So it's possible to change migratability for non-anonymous thps to avoid the issue, but it introduces more complex and thp-specific handling in migration code, so it might not good. So this patch is suggesting to fix the issue by enabling thp migration for shmem thp. Both of anon/shmem thp are migratable so we don't need precheck about the type of thps. Link: http://lkml.kernel.org/r/20180406030706.GA2434@hori1.linux.bs1.fc.nec.co.jp Fixes: commit 72b39cfc4d75 ("mm, memory_hotplug: do not fail offlining too early") Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: Zi Yan Cc: Vlastimil Babka Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 5 ++++- mm/migrate.c | 19 ++++++++++++++++--- mm/rmap.c | 3 --- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 14ed6ee5e02f..a3a1815f8e11 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2925,7 +2925,10 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) pmde = maybe_pmd_mkwrite(pmde, vma); flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); - page_add_anon_rmap(new, vma, mmun_start, true); + if (PageAnon(new)) + page_add_anon_rmap(new, vma, mmun_start, true); + else + page_add_file_rmap(new, true); set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); if (vma->vm_flags & VM_LOCKED) mlock_vma_page(new); diff --git a/mm/migrate.c b/mm/migrate.c index 70ef794cccae..568433023831 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -472,7 +472,7 @@ int migrate_page_move_mapping(struct address_space *mapping, pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page)); - expected_count += 1 + page_has_private(page); + expected_count += hpage_nr_pages(page) + page_has_private(page); if (page_count(page) != expected_count || radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) { @@ -505,7 +505,7 @@ int migrate_page_move_mapping(struct address_space *mapping, */ newpage->index = page->index; newpage->mapping = page->mapping; - get_page(newpage); /* add cache reference */ + page_ref_add(newpage, hpage_nr_pages(page)); /* add cache reference */ if (PageSwapBacked(page)) { __SetPageSwapBacked(newpage); if (PageSwapCache(page)) { @@ -524,13 +524,26 @@ int migrate_page_move_mapping(struct address_space *mapping, } radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); + if (PageTransHuge(page)) { + int i; + int index = page_index(page); + + for (i = 0; i < HPAGE_PMD_NR; i++) { + pslot = radix_tree_lookup_slot(&mapping->i_pages, + index + i); + radix_tree_replace_slot(&mapping->i_pages, pslot, + newpage + i); + } + } else { + radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); + } /* * Drop cache reference from old page by unfreezing * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - 1); + page_ref_unfreeze(page, expected_count - hpage_nr_pages(page)); xa_unlock(&mapping->i_pages); /* Leave irq disabled to prevent preemption while updating stats */ diff --git a/mm/rmap.c b/mm/rmap.c index f0dd4e4565bc..8d5337fed37b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1374,9 +1374,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (!pvmw.pte && (flags & TTU_MIGRATION)) { VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page); - if (!PageAnon(page)) - continue; - set_pmd_migration_entry(&pvmw, page); continue; } From c5157b76869ba98c3a99a1982396437464e131a6 Mon Sep 17 00:00:00 2001 From: Ioan Nicu Date: Fri, 20 Apr 2018 14:55:49 -0700 Subject: [PATCH 388/660] rapidio: fix rio_dma_transfer error handling Some of the mport_dma_req structure members were initialized late inside the do_dma_request() function, just before submitting the request to the dma engine. But we have some error branches before that. In case of such an error, the code would return on the error path and trigger the calling of dma_req_free() with a req structure which is not completely initialized. This causes a NULL pointer dereference in dma_req_free(). This patch fixes these error branches by making sure that all necessary mport_dma_req structure members are initialized in rio_dma_transfer() immediately after the request structure gets allocated. Link: http://lkml.kernel.org/r/20180412150605.GA31409@nokia.com Fixes: bbd876adb8c72 ("rapidio: use a reference count for struct mport_dma_req") Signed-off-by: Ioan Nicu Tested-by: Alexander Sverdlin Acked-by: Alexandre Bounine Cc: Barry Wood Cc: Matt Porter Cc: Christophe JAILLET Cc: Logan Gunthorpe Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Frank Kunz Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/rio_mport_cdev.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 9d27016c899e..0434ab7b6497 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -740,10 +740,7 @@ static int do_dma_request(struct mport_dma_req *req, tx->callback = dma_xfer_callback; tx->callback_param = req; - req->dmach = chan; - req->sync = sync; req->status = DMA_IN_PROGRESS; - init_completion(&req->req_comp); kref_get(&req->refcount); cookie = dmaengine_submit(tx); @@ -831,13 +828,20 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, if (!req) return -ENOMEM; - kref_init(&req->refcount); - ret = get_dma_channel(priv); if (ret) { kfree(req); return ret; } + chan = priv->dmach; + + kref_init(&req->refcount); + init_completion(&req->req_comp); + req->dir = dir; + req->filp = filp; + req->priv = priv; + req->dmach = chan; + req->sync = sync; /* * If parameter loc_addr != NULL, we are transferring data from/to @@ -925,11 +929,6 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, xfer->offset, xfer->length); } - req->dir = dir; - req->filp = filp; - req->priv = priv; - chan = priv->dmach; - nents = dma_map_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir); if (nents == 0) { From 12c8f25a016dff69ee284aa3338bebfd2cfcba33 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 20 Apr 2018 14:55:52 -0700 Subject: [PATCH 389/660] kasan: add no_sanitize attribute for clang builds KASAN uses the __no_sanitize_address macro to disable instrumentation of particular functions. Right now it's defined only for GCC build, which causes false positives when clang is used. This patch adds a definition for clang. Note, that clang's revision 329612 or higher is required. [andreyknvl@google.com: remove redundant #ifdef CONFIG_KASAN check] Link: http://lkml.kernel.org/r/c79aa31a2a2790f6131ed607c58b0dd45dd62a6c.1523967959.git.andreyknvl@google.com Link: http://lkml.kernel.org/r/4ad725cc903f8534f8c8a60f0daade5e3d674f8d.1523554166.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: David Rientjes Cc: Thomas Gleixner Cc: Ingo Molnar Cc: David Woodhouse Cc: Andrey Konovalov Cc: Will Deacon Cc: Greg Kroah-Hartman Cc: Paul Lawrence Cc: Sandipan Das Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ceb96ecab96e..7d98e263e048 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -25,6 +25,9 @@ #define __SANITIZE_ADDRESS__ #endif +#undef __no_sanitize_address +#define __no_sanitize_address __attribute__((no_sanitize("address"))) + /* Clang doesn't have a way to turn it off per-function, yet. */ #ifdef __noretpoline #undef __noretpoline From 1551cf740cffc62221a4f13da1892f15b0d36f1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 Apr 2018 14:55:56 -0700 Subject: [PATCH 390/660] MAINTAINERS: add personal addresses for Sascha and Uwe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The idea behind using kernel@pengutronix.de (i.e. the mail alias for the kernel people at Pengutronix) as email address was to have a backup when a given developer is on vacation or run over by a bus. Make this more explicit by adding the alias as reviewer and use the personal address for Sascha and me. Link: http://lkml.kernel.org/r/20180413083312.11213-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Acked-by: Sascha Hauer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5ae51d05c09b..b7ff5654b8b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1373,7 +1373,8 @@ F: arch/arm/mach-ebsa110/ F: drivers/net/ethernet/amd/am79c961a.* ARM/ENERGY MICRO (SILICON LABS) EFM32 SUPPORT -M: Uwe Kleine-König +M: Uwe Kleine-König +R: Pengutronix Kernel Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained N: efm32 @@ -1401,7 +1402,8 @@ F: arch/arm/mach-footbridge/ ARM/FREESCALE IMX / MXC ARM ARCHITECTURE M: Shawn Guo -M: Sascha Hauer +M: Sascha Hauer +R: Pengutronix Kernel Team R: Fabio Estevam L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -1416,7 +1418,8 @@ F: include/soc/imx/ ARM/FREESCALE VYBRID ARM ARCHITECTURE M: Shawn Guo -M: Sascha Hauer +M: Sascha Hauer +R: Pengutronix Kernel Team R: Stefan Agner L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -5652,7 +5655,8 @@ F: drivers/net/ethernet/freescale/fec.h F: Documentation/devicetree/bindings/net/fsl-fec.txt FREESCALE IMX / MXC FRAMEBUFFER DRIVER -M: Sascha Hauer +M: Sascha Hauer +R: Pengutronix Kernel Team L: linux-fbdev@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -12825,7 +12829,8 @@ F: include/linux/siphash.h SIOX M: Gavin Schenk -M: Uwe Kleine-König +M: Uwe Kleine-König +R: Pengutronix Kernel Team S: Supported F: drivers/siox/* F: include/trace/events/siox.h From 1e6306652ba18723015d1b4967fe9de55f042499 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 20 Apr 2018 14:55:59 -0700 Subject: [PATCH 391/660] autofs: mount point create should honour passed in mode The autofs file system mkdir inode operation blindly sets the created directory mode to S_IFDIR | 0555, ingoring the passed in mode, which can cause selinux dac_override denials. But the function also checks if the caller is the daemon (as no-one else should be able to do anything here) so there's no point in not honouring the passed in mode, allowing the daemon to set appropriate mode when required. Link: http://lkml.kernel.org/r/152361593601.8051.14014139124905996173.stgit@pluto.themaw.net Signed-off-by: Ian Kent Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 82e8f6edfb48..b12e37f27530 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -749,7 +749,7 @@ static int autofs4_dir_mkdir(struct inode *dir, autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return -ENOMEM; d_add(dentry, inode); From 2e0ad552f5f8cd0fda02bc45fcd2b89821c62fd1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 20 Apr 2018 14:56:03 -0700 Subject: [PATCH 392/660] proc: revalidate kernel thread inodes to root:root task_dump_owner() has the following code: mm = task->mm; if (mm) { if (get_dumpable(mm) != SUID_DUMP_USER) { uid = ... } } Check for ->mm is buggy -- kernel thread might be borrowing mm and inode will go to some random uid:gid pair. Link: http://lkml.kernel.org/r/20180412220109.GA20978@avx2 Signed-off-by: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index eafa39a3a88c..1b2ede6abcdf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1693,6 +1693,12 @@ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t uid; kgid_t gid; + if (unlikely(task->flags & PF_KTHREAD)) { + *ruid = GLOBAL_ROOT_UID; + *rgid = GLOBAL_ROOT_GID; + return; + } + /* Default to the tasks effective ownership */ rcu_read_lock(); cred = __task_cred(task); From 9a1015b32faa7cebfe19663c886b0cfe90be1d49 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 20 Apr 2018 14:56:06 -0700 Subject: [PATCH 393/660] proc: fix /proc/loadavg regression Commit 95846ecf9dac ("pid: replace pid bitmap implementation with IDR API") changed last field of /proc/loadavg (last pid allocated) to be off by one: # unshare -p -f --mount-proc cat /proc/loadavg 0.00 0.00 0.00 1/60 2 <=== It should be 1 after first fork into pid namespace. This is formally a regression but given how useless this field is I don't think anyone is affected. Bug was found by /proc testsuite! Link: http://lkml.kernel.org/r/20180413175408.GA27246@avx2 Fixes: 95846ecf9dac508 ("pid: replace pid bitmap implementation with IDR API") Signed-off-by: Alexey Dobriyan Cc: "Eric W. Biederman" Cc: Gargi Sharma Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/sched.c | 2 +- fs/proc/loadavg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 9033c8194eda..ccc421503363 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) LOAD_INT(c), LOAD_FRAC(c), count_active_contexts(), atomic_read(&nr_spu_contexts), - idr_get_cursor(&task_active_pid_ns(current)->idr)); + idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; } diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index a000d7547479..b572cc865b92 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -24,7 +24,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v) LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, - idr_get_cursor(&task_active_pid_ns(current)->idr)); + idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; } From a841aa83dff0af75c88aa846ba610a8af4c5ee21 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 20 Apr 2018 14:56:10 -0700 Subject: [PATCH 394/660] kexec_file: do not add extra alignment to efi memmap Chun-Yi reported a kernel warning message below: WARNING: CPU: 0 PID: 0 at ../mm/early_ioremap.c:182 early_iounmap+0x4f/0x12c() early_iounmap(ffffffffff200180, 00000118) [0] size not consistent 00000120 The problem is x86 kexec_file_load adds extra alignment to the efi memmap: in bzImage64_load(): efi_map_sz = efi_get_runtime_map_size(); efi_map_sz = ALIGN(efi_map_sz, 16); And __efi_memmap_init maps with the size including the alignment bytes but efi_memmap_unmap use nr_maps * desc_size which does not include the extra bytes. The alignment in kexec code is only needed for the kexec buffer internal use Actually kexec should pass exact size of the efi memmap to 2nd kernel. Link: http://lkml.kernel.org/r/20180417083600.GA1972@dhcp-128-65.nay.redhat.com Signed-off-by: Dave Young Reported-by: joeyli Tested-by: Randy Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/kexec-bzimage64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 3182908b7e6c..7326078eaa7a 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -398,11 +398,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel, * little bit simple */ efi_map_sz = efi_get_runtime_map_size(); - efi_map_sz = ALIGN(efi_map_sz, 16); params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + MAX_ELFCOREHDR_STR_LEN; params_cmdline_sz = ALIGN(params_cmdline_sz, 16); - kbuf.bufsz = params_cmdline_sz + efi_map_sz + + kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) + sizeof(struct setup_data) + sizeof(struct efi_setup_data); @@ -410,7 +409,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, if (!params) return ERR_PTR(-ENOMEM); efi_map_offset = params_cmdline_sz; - efi_setup_data_offset = efi_map_offset + efi_map_sz; + efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16); /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; From d23a61ee90af38bb4a65decf76e798e65b401482 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 20 Apr 2018 14:56:13 -0700 Subject: [PATCH 395/660] fs, elf: don't complain MAP_FIXED_NOREPLACE unless -EEXIST error Commit 4ed28639519c ("fs, elf: drop MAP_FIXED usage from elf_map") is printing spurious messages under memory pressure due to map_addr == -ENOMEM. 9794 (a.out): Uhuuh, elf segment at 00007f2e34738000(fffffffffffffff4) requested but the memory is mapped already 14104 (a.out): Uhuuh, elf segment at 00007f34fd76c000(fffffffffffffff4) requested but the memory is mapped already 16843 (a.out): Uhuuh, elf segment at 00007f930ecc7000(fffffffffffffff4) requested but the memory is mapped already Complain only if -EEXIST, and use %px for printing the address. Link: http://lkml.kernel.org/r/201804182307.FAC17665.SFMOFJVFtHOLOQ@I-love.SAKURA.ne.jp Fixes: 4ed28639519c7bad ("fs, elf: drop MAP_FIXED usage from elf_map") is Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: Andrei Vagin Cc: Khalid Aziz Cc: Michael Ellerman Cc: Kees Cook Cc: Abdul Haleem Cc: Joel Stanley Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 41e04183e4ce..4ad6f669fe34 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -377,10 +377,10 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, } else map_addr = vm_mmap(filep, addr, size, prot, type, off); - if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr)) - pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n", - task_pid_nr(current), current->comm, - (void *)addr); + if ((type & MAP_FIXED_NOREPLACE) && + PTR_ERR((void *)map_addr) == -EEXIST) + pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n", + task_pid_nr(current), current->comm, (void *)addr); return(map_addr); } From c892fd82cc0632d425ae011a4dd75eb59e9f84ee Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 20 Apr 2018 14:56:17 -0700 Subject: [PATCH 396/660] mm: memcg: add __GFP_NOWARN in __memcg_schedule_kmem_cache_create() If there is heavy memory pressure, page allocation with __GFP_NOWAIT fails easily although it's order-0 request. I got below warning 9 times for normal boot. : page allocation failure: order:0, mode:0x2200000(GFP_NOWAIT|__GFP_NOTRACK) .. snip .. Call trace: dump_backtrace+0x0/0x4 dump_stack+0xa4/0xc0 warn_alloc+0xd4/0x15c __alloc_pages_nodemask+0xf88/0x10fc alloc_slab_page+0x40/0x18c new_slab+0x2b8/0x2e0 ___slab_alloc+0x25c/0x464 __kmalloc+0x394/0x498 memcg_kmem_get_cache+0x114/0x2b8 kmem_cache_alloc+0x98/0x3e8 mmap_region+0x3bc/0x8c0 do_mmap+0x40c/0x43c vm_mmap_pgoff+0x15c/0x1e4 sys_mmap+0xb0/0xc8 el0_svc_naked+0x24/0x28 Mem-Info: active_anon:17124 inactive_anon:193 isolated_anon:0 active_file:7898 inactive_file:712955 isolated_file:55 unevictable:0 dirty:27 writeback:18 unstable:0 slab_reclaimable:12250 slab_unreclaimable:23334 mapped:19310 shmem:212 pagetables:816 bounce:0 free:36561 free_pcp:1205 free_cma:35615 Node 0 active_anon:68496kB inactive_anon:772kB active_file:31592kB inactive_file:2851820kB unevictable:0kB isolated(anon):0kB isolated(file):220kB mapped:77240kB dirty:108kB writeback:72kB shmem:848kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no DMA free:142188kB min:3056kB low:3820kB high:4584kB active_anon:10052kB inactive_anon:12kB active_file:312kB inactive_file:1412620kB unevictable:0kB writepending:0kB present:1781412kB managed:1604728kB mlocked:0kB slab_reclaimable:3592kB slab_unreclaimable:876kB kernel_stack:400kB pagetables:52kB bounce:0kB free_pcp:1436kB local_pcp:124kB free_cma:142492kB lowmem_reserve[]: 0 1842 1842 Normal free:4056kB min:4172kB low:5212kB high:6252kB active_anon:58376kB inactive_anon:760kB active_file:31348kB inactive_file:1439040kB unevictable:0kB writepending:180kB present:2000636kB managed:1923688kB mlocked:0kB slab_reclaimable:45408kB slab_unreclaimable:92460kB kernel_stack:9680kB pagetables:3212kB bounce:0kB free_pcp:3392kB local_pcp:688kB free_cma:0kB lowmem_reserve[]: 0 0 0 DMA: 0*4kB 0*8kB 1*16kB (C) 0*32kB 0*64kB 0*128kB 1*256kB (C) 1*512kB (C) 0*1024kB 1*2048kB (C) 34*4096kB (C) = 142096kB Normal: 228*4kB (UMEH) 172*8kB (UMH) 23*16kB (UH) 24*32kB (H) 5*64kB (H) 1*128kB (H) 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 3872kB 721350 total pagecache pages 0 pages in swap cache Swap cache stats: add 0, delete 0, find 0/0 Free swap = 0kB Total swap = 0kB 945512 pages RAM 0 pages HighMem/MovableOnly 63408 pages reserved 51200 pages cma reserved __memcg_schedule_kmem_cache_create() tries to create a shadow slab cache and the worker allocation failure is not really critical because we will retry on the next kmem charge. We might miss some charges but that shouldn't be critical. The excessive allocation failure report is not very helpful. [mhocko@kernel.org: changelog update] Link: http://lkml.kernel.org/r/20180418022912.248417-1-minchan@kernel.org Signed-off-by: Minchan Kim Acked-by: Johannes Weiner Reviewed-by: Andrew Morton Cc: Michal Hocko Cc: Vladimir Davydov Cc: Minchan Kim Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e074f7c637aa..2bd3df3d101a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2192,7 +2192,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, { struct memcg_kmem_cache_create_work *cw; - cw = kmalloc(sizeof(*cw), GFP_NOWAIT); + cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN); if (!cw) return; From abc1be13fd113ddef5e2d807a466286b864caed3 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 Apr 2018 14:56:20 -0700 Subject: [PATCH 397/660] mm/filemap.c: fix NULL pointer in page_cache_tree_insert() f2fs specifies the __GFP_ZERO flag for allocating some of its pages. Unfortunately, the page cache also uses the mapping's GFP flags for allocating radix tree nodes. It always masked off the __GFP_HIGHMEM flag, and masks off __GFP_ZERO in some paths, but not all. That causes radix tree nodes to be allocated with a NULL list_head, which causes backtraces like: __list_del_entry+0x30/0xd0 list_lru_del+0xac/0x1ac page_cache_tree_insert+0xd8/0x110 The __GFP_DMA and __GFP_DMA32 flags would also be able to sneak through if they are ever used. Fix them all by using GFP_RECLAIM_MASK at the innermost location, and remove it from earlier in the callchain. Link: http://lkml.kernel.org/r/20180411060320.14458-2-willy@infradead.org Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check") Signed-off-by: Matthew Wilcox Reported-by: Chris Fries Debugged-by: Minchan Kim Acked-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 9276bdb2343c..0604cb02e6f3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -786,7 +786,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) VM_BUG_ON_PAGE(!PageLocked(new), new); VM_BUG_ON_PAGE(new->mapping, new); - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK); if (!error) { struct address_space *mapping = old->mapping; void (*freepage)(struct page *); @@ -842,7 +842,7 @@ static int __add_to_page_cache_locked(struct page *page, return error; } - error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK); if (error) { if (!huge) mem_cgroup_cancel_charge(page, memcg, false); @@ -1585,8 +1585,7 @@ no_page: if (fgp_flags & FGP_ACCESSED) __SetPageReferenced(page); - err = add_to_page_cache_lru(page, mapping, offset, - gfp_mask & GFP_RECLAIM_MASK); + err = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (unlikely(err)) { put_page(page); page = NULL; @@ -2387,7 +2386,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) if (!page) return -ENOMEM; - ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_KERNEL); + ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) From 83a62c51ba7b3c0bf45150c4eac7aefc6c785e94 Mon Sep 17 00:00:00 2001 From: Ravi Chandra Sadineni Date: Fri, 20 Apr 2018 11:08:21 -0700 Subject: [PATCH 398/660] USB: Increment wakeup count on remote wakeup. On chromebooks we depend on wakeup count to identify the wakeup source. But currently USB devices do not increment the wakeup count when they trigger the remote wake. This patch addresses the same. Resume condition is reported differently on USB 2.0 and USB 3.0 devices. On USB 2.0 devices, a wake capable device, if wake enabled, drives resume signal to indicate a remote wake (USB 2.0 spec section 7.1.7.7). The upstream facing port then sets C_PORT_SUSPEND bit and reports a port change event (USB 2.0 spec section 11.24.2.7.2.3). Thus if a port has resumed before driving the resume signal from the host and C_PORT_SUSPEND is set, then the device attached to the given port might be the reason for the last system wakeup. Increment the wakeup count for the same. On USB 3.0 devices, a function may signal that it wants to exit from device suspend by sending a Function Wake Device Notification to the host (USB3.0 spec section 8.5.6.4) Thus on receiving the Function Wake, increment the wakeup count. Signed-off-by: Ravi Chandra Sadineni Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 1 + drivers/usb/core/hub.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 777036ae6367..00bb8417050f 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2377,6 +2377,7 @@ void usb_hcd_resume_root_hub (struct usb_hcd *hcd) spin_lock_irqsave (&hcd_root_hub_lock, flags); if (hcd->rh_registered) { + pm_wakeup_event(&hcd->self.root_hub->dev, 0); set_bit(HCD_FLAG_WAKEUP_PENDING, &hcd->flags); queue_work(pm_wq, &hcd->wakeup_work); } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index f6ea16e9f6bb..aa9968d90a48 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -653,12 +653,17 @@ void usb_wakeup_notification(struct usb_device *hdev, unsigned int portnum) { struct usb_hub *hub; + struct usb_port *port_dev; if (!hdev) return; hub = usb_hub_to_struct_hub(hdev); if (hub) { + port_dev = hub->ports[portnum - 1]; + if (port_dev && port_dev->child) + pm_wakeup_event(&port_dev->child->dev, 0); + set_bit(portnum, hub->wakeup_bits); kick_hub_wq(hub); } @@ -3434,8 +3439,11 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) /* Skip the initial Clear-Suspend step for a remote wakeup */ status = hub_port_status(hub, port1, &portstatus, &portchange); - if (status == 0 && !port_is_suspended(hub, portstatus)) + if (status == 0 && !port_is_suspended(hub, portstatus)) { + if (portchange & USB_PORT_STAT_C_SUSPEND) + pm_wakeup_event(&udev->dev, 0); goto SuspendCleared; + } /* see 7.1.7.7; affects power usage, but not budgeting */ if (hub_is_superspeed(hub->hdev)) From 5b22f676118ff25049382041da0db8012e57c9e8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 5 Apr 2018 16:31:49 -0600 Subject: [PATCH 399/660] usbip: vhci_hcd: check rhport before using in vhci_hub_control() Validate !rhport < 0 before using it to access port_status array. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 20e3d4609583..d11f3f8dad40 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -354,6 +354,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh(" ClearHubFeature\n"); break; case ClearPortFeature: + if (rhport < 0) + goto error; switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { @@ -511,11 +513,16 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; } + if (rhport < 0) + goto error; + vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND; break; case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_POWER\n"); + if (rhport < 0) + goto error; if (hcd->speed == HCD_USB3) vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER; else @@ -524,6 +531,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_BH_PORT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n"); + if (rhport < 0) + goto error; /* Applicable only for USB3.0 hub */ if (hcd->speed != HCD_USB3) { pr_err("USB_PORT_FEAT_BH_PORT_RESET req not " @@ -534,6 +543,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_RESET\n"); + if (rhport < 0) + goto error; /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { vhci_hcd->port_status[rhport] = 0; @@ -554,6 +565,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" SetPortFeature: default %d\n", wValue); + if (rhport < 0) + goto error; if (hcd->speed == HCD_USB3) { if ((vhci_hcd->port_status[rhport] & USB_SS_PORT_STAT_POWER) != 0) { From 9020a7efe537856eb3e826ebebdf38a5d07a7857 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Apr 2018 14:52:32 -0600 Subject: [PATCH 400/660] usbip: vhci_hcd: Fix usb device and sockfd leaks vhci_hcd fails to do reset to put usb device and sockfd in the module remove/stop paths. Fix the leak. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index 473fb8a87289..bf8afe9b5883 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -243,7 +243,7 @@ enum usbip_side { #define VUDC_EVENT_ERROR_USB (USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE) #define VUDC_EVENT_ERROR_MALLOC (USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE) -#define VDEV_EVENT_REMOVED (USBIP_EH_SHUTDOWN | USBIP_EH_BYE) +#define VDEV_EVENT_REMOVED (USBIP_EH_SHUTDOWN | USBIP_EH_RESET | USBIP_EH_BYE) #define VDEV_EVENT_DOWN (USBIP_EH_SHUTDOWN | USBIP_EH_RESET) #define VDEV_EVENT_ERROR_TCP (USBIP_EH_SHUTDOWN | USBIP_EH_RESET) #define VDEV_EVENT_ERROR_MALLOC (USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE) From 4bfb141bc01312a817d36627cc47c93f801c216d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 5 Apr 2018 16:29:04 -0600 Subject: [PATCH 401/660] usbip: usbip_host: fix to hold parent lock for device_attach() calls usbip_host calls device_attach() without holding dev->parent lock. Fix it. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index c31c8402a0c5..d41d0cdeec0f 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -186,7 +186,12 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, if (!bid) return -ENODEV; + /* device_attach() callers should hold parent lock for USB */ + if (bid->udev->dev.parent) + device_lock(bid->udev->dev.parent); ret = device_attach(&bid->udev->dev); + if (bid->udev->dev.parent) + device_unlock(bid->udev->dev.parent); if (ret < 0) { dev_err(&bid->udev->dev, "rebind failed\n"); return ret; From 4c982482341c64f55daf69b6caa5a2bcd9b43824 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 5 Apr 2018 16:29:50 -0600 Subject: [PATCH 402/660] usbip: usbip_event: fix to not print kernel pointer address Fix it to not print kernel pointer address. Remove the conditional and debug message as it isn't very useful. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_event.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/usbip/usbip_event.c b/drivers/usb/usbip/usbip_event.c index 5b4c0864ad92..5d88917c9631 100644 --- a/drivers/usb/usbip/usbip_event.c +++ b/drivers/usb/usbip/usbip_event.c @@ -91,10 +91,6 @@ static void event_handler(struct work_struct *work) unset_event(ud, USBIP_EH_UNUSABLE); } - /* Stop the error handler. */ - if (ud->event & USBIP_EH_BYE) - usbip_dbg_eh("removed %p\n", ud); - wake_up(&ud->eh_waitq); } } From 2f860691c2d2e3af1404ffeb2d22dd5c3dbca811 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Tue, 10 Apr 2018 10:38:06 +0200 Subject: [PATCH 403/660] usb: typec: ucsi: fix tracepoint related build error There is the following build error with CONFIG_TYPEC_UCSI=m, CONFIG_FTRACE=y and CONFIG_TRACING=n: ERROR: "__tracepoint_ucsi_command" [drivers/usb/typec/ucsi/typec_ucsi.ko] undefined! ERROR: "__tracepoint_ucsi_register_port" [drivers/usb/typec/ucsi/typec_ucsi.ko] undefined! ERROR: "__tracepoint_ucsi_notify" [drivers/usb/typec/ucsi/typec_ucsi.ko] undefined! ERROR: "__tracepoint_ucsi_reset_ppm" [drivers/usb/typec/ucsi/typec_ucsi.ko] undefined! ERROR: "__tracepoint_ucsi_run_command" [drivers/usb/typec/ucsi/typec_ucsi.ko] undefined! ERROR: "__tracepoint_ucsi_ack" [drivers/usb/typec/ucsi/typec_ucsi.ko] undefined! ERROR: "__tracepoint_ucsi_connector_change" [drivers/usb/typec/ucsi/typec_ucsi.ko] undefined! This compination is quite hard to create because CONFIG_TRACING gets selected only in rare cases without CONFIG_FTRACE. The build failure is caused by conditionally compiling trace.c depending on the wrong option CONFIG_FTRACE. Change this to depend on CONFIG_TRACING like other users of tracepoints do. Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface") Signed-off-by: Tobias Regnery Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/Makefile b/drivers/usb/typec/ucsi/Makefile index b57891c1fd31..7afbea512207 100644 --- a/drivers/usb/typec/ucsi/Makefile +++ b/drivers/usb/typec/ucsi/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_TYPEC_UCSI) += typec_ucsi.o typec_ucsi-y := ucsi.o -typec_ucsi-$(CONFIG_FTRACE) += trace.o +typec_ucsi-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_UCSI_ACPI) += ucsi_acpi.o From 1acc85d82f1510f1176a5f873c30d57ab7a36218 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 6 Apr 2018 15:41:22 +0300 Subject: [PATCH 404/660] Documentation: typec.rst: Use literal-block element with ascii art Using reStructuredText literal-block element with ascii-art. That prevents the ascii art from being processed as reStructuredText. Reported-by: Masanari Iida Fixes: bdecb33af34f ("usb: typec: API for controlling USB Type-C Multiplexers") Signed-off-by: Heikki Krogerus Reviewed-by: Jani Nikula Tested-by: Jani Nikula Reported-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/usb/typec.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/usb/typec.rst b/Documentation/driver-api/usb/typec.rst index feb31946490b..48ff58095f11 100644 --- a/Documentation/driver-api/usb/typec.rst +++ b/Documentation/driver-api/usb/typec.rst @@ -210,7 +210,7 @@ If the connector is dual-role capable, there may also be a switch for the data role. USB Type-C Connector Class does not supply separate API for them. The port drivers can use USB Role Class API with those. -Illustration of the muxes behind a connector that supports an alternate mode: +Illustration of the muxes behind a connector that supports an alternate mode:: ------------------------ | Connector | From 3180dabe08e3653bf0a838553905d88f3773f29c Mon Sep 17 00:00:00 2001 From: Kamil Lulko Date: Thu, 19 Apr 2018 16:54:02 -0700 Subject: [PATCH 405/660] usb: core: Add quirk for HP v222w 16GB Mini Add DELAY_INIT quirk to fix the following problem with HP v222w 16GB Mini: usb 1-3: unable to read config index 0 descriptor/start: -110 usb 1-3: can't read configurations, error -110 usb 1-3: can't set config #1, error -110 Signed-off-by: Kamil Lulko Signed-off-by: Kuppuswamy Sathyanarayanan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 920f48a49a87..c55def2f1320 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -186,6 +186,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x03f0, 0x0701), .driver_info = USB_QUIRK_STRING_FETCH_255 }, + /* HP v222w 16GB Mini USB Drive */ + { USB_DEVICE(0x03f0, 0x3f40), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Creative SB Audigy 2 NX */ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME }, From c20f53c58261b121d0989e147368803b9773b413 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 22 Apr 2018 14:31:03 +0200 Subject: [PATCH 406/660] Revert "xhci: plat: Register shutdown for xhci_plat" This reverts commit b07c12517f2aed0add8ce18146bb426b14099392 It is incomplete and causes hangs on devices when shutting down. It needs a much more "complete" fix in order to work properly. As that fix has not been merged, revert this patch for now before it causes any more problems. Cc: Greg Hackmann Cc: Adam Wallis Cc: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 596e7a71b666..c1b22fc64e38 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -435,7 +435,6 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, - .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = &xhci_plat_pm_ops, From b1b59e16075f5e5da2943ce8de724ab96bc3c6c2 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 18 Apr 2018 15:34:10 +0300 Subject: [PATCH 407/660] usb: typec: ucsi: Increase command completion timeout value On some boards, under heavy load, the EC firmware is unable to complete commands even in one second. Increasing the command completion timeout value to five seconds. Reported-by: Quanxian Wang Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface") Cc: Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index bf0977fbd100..bd5cca5632b3 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -28,7 +28,7 @@ * difficult to estimate the time it takes for the system to process the command * before it is actually passed to the PPM. */ -#define UCSI_TIMEOUT_MS 1000 +#define UCSI_TIMEOUT_MS 5000 /* * UCSI_SWAP_TIMEOUT_MS - Timeout for role swap requests From dd40438fc6be7454c906093fe2a1b69ca98946fb Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 18 Apr 2018 21:39:46 +0200 Subject: [PATCH 408/660] usb: core: phy: fix return value of usb_phy_roothub_exit() usb_phy_roothub_exit() should return the error code from the phy_exit() call if exiting the PHY failed. However, since a wrong variable is used usb_phy_roothub_exit() currently always returns 0, even if one of the phy_exit calls returned an error. Clang also reports this bug: kernel/drivers/usb/core/phy.c:114:8: warning: explicitly assigning value of variable of type 'int' to itself [-Wself-assign] error, forbidden warning: phy.c:114 Fix this by assigning the error code from phy_exit() to the "ret" variable to propagate the error correctly. Fixes: 07dbff0ddbd86c ("usb: core: add a wrapper for the USB PHYs on the HCD") Signed-off-by: Martin Blumenstingl Signed-off-by: Rishabh Bhatnagar Tested-by: Keerthy Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/phy.c b/drivers/usb/core/phy.c index 09b7c43c0ea4..f19aaa3c899c 100644 --- a/drivers/usb/core/phy.c +++ b/drivers/usb/core/phy.c @@ -111,7 +111,7 @@ int usb_phy_roothub_exit(struct usb_phy_roothub *phy_roothub) list_for_each_entry(roothub_entry, head, list) { err = phy_exit(roothub_entry->phy); if (err) - ret = ret; + ret = err; } return ret; From 63cb03f5c11eef2c08b5812f4533ba87cf778fa8 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 18 Apr 2018 21:39:47 +0200 Subject: [PATCH 409/660] usb: core: split usb_phy_roothub_{init,alloc} Before this patch usb_phy_roothub_init served two purposes (from a caller's point of view - like hcd.c): - parsing the PHYs and allocating the list entries - calling phy_init on each list entry While this worked so far it has one disadvantage: if we need to call phy_init for each PHY instance then the existing code cannot be re-used. Solve this by splitting off usb_phy_roothub_alloc which only parses the PHYs and allocates the list entries. usb_phy_roothub_init then gets a struct usb_phy_roothub and only calls phy_init on each PHY instance (along with the corresponding cleanup if that failed somewhere). This is a preparation step for adding proper suspend support for some hardware that requires phy_exit to be called during suspend and phy_init to be called during resume. Signed-off-by: Martin Blumenstingl Tested-by: Chunfeng Yun Reviewed-by: Roger Quadros Tested-by: Keerthy Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 10 +++++--- drivers/usb/core/phy.c | 53 +++++++++++++++++++++--------------------- drivers/usb/core/phy.h | 4 +++- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 00bb8417050f..f0e5f4d875d8 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2759,12 +2759,16 @@ int usb_add_hcd(struct usb_hcd *hcd, } if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { - hcd->phy_roothub = usb_phy_roothub_init(hcd->self.sysdev); + hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); if (IS_ERR(hcd->phy_roothub)) { retval = PTR_ERR(hcd->phy_roothub); - goto err_phy_roothub_init; + goto err_phy_roothub_alloc; } + retval = usb_phy_roothub_init(hcd->phy_roothub); + if (retval) + goto err_phy_roothub_alloc; + retval = usb_phy_roothub_power_on(hcd->phy_roothub); if (retval) goto err_usb_phy_roothub_power_on; @@ -2937,7 +2941,7 @@ err_create_buf: usb_phy_roothub_power_off(hcd->phy_roothub); err_usb_phy_roothub_power_on: usb_phy_roothub_exit(hcd->phy_roothub); -err_phy_roothub_init: +err_phy_roothub_alloc: if (hcd->remove_phy && hcd->usb_phy) { usb_phy_shutdown(hcd->usb_phy); usb_put_phy(hcd->usb_phy); diff --git a/drivers/usb/core/phy.c b/drivers/usb/core/phy.c index f19aaa3c899c..44f008cda7a8 100644 --- a/drivers/usb/core/phy.c +++ b/drivers/usb/core/phy.c @@ -19,19 +19,6 @@ struct usb_phy_roothub { struct list_head list; }; -static struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev) -{ - struct usb_phy_roothub *roothub_entry; - - roothub_entry = devm_kzalloc(dev, sizeof(*roothub_entry), GFP_KERNEL); - if (!roothub_entry) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&roothub_entry->list); - - return roothub_entry; -} - static int usb_phy_roothub_add_phy(struct device *dev, int index, struct list_head *list) { @@ -45,9 +32,11 @@ static int usb_phy_roothub_add_phy(struct device *dev, int index, return PTR_ERR(phy); } - roothub_entry = usb_phy_roothub_alloc(dev); - if (IS_ERR(roothub_entry)) - return PTR_ERR(roothub_entry); + roothub_entry = devm_kzalloc(dev, sizeof(*roothub_entry), GFP_KERNEL); + if (!roothub_entry) + return -ENOMEM; + + INIT_LIST_HEAD(&roothub_entry->list); roothub_entry->phy = phy; @@ -56,11 +45,9 @@ static int usb_phy_roothub_add_phy(struct device *dev, int index, return 0; } -struct usb_phy_roothub *usb_phy_roothub_init(struct device *dev) +struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev) { struct usb_phy_roothub *phy_roothub; - struct usb_phy_roothub *roothub_entry; - struct list_head *head; int i, num_phys, err; num_phys = of_count_phandle_with_args(dev->of_node, "phys", @@ -68,16 +55,31 @@ struct usb_phy_roothub *usb_phy_roothub_init(struct device *dev) if (num_phys <= 0) return NULL; - phy_roothub = usb_phy_roothub_alloc(dev); - if (IS_ERR(phy_roothub)) - return phy_roothub; + phy_roothub = devm_kzalloc(dev, sizeof(*phy_roothub), GFP_KERNEL); + if (!phy_roothub) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&phy_roothub->list); for (i = 0; i < num_phys; i++) { err = usb_phy_roothub_add_phy(dev, i, &phy_roothub->list); if (err) - goto err_out; + return ERR_PTR(err); } + return phy_roothub; +} +EXPORT_SYMBOL_GPL(usb_phy_roothub_alloc); + +int usb_phy_roothub_init(struct usb_phy_roothub *phy_roothub) +{ + struct usb_phy_roothub *roothub_entry; + struct list_head *head; + int err; + + if (!phy_roothub) + return 0; + head = &phy_roothub->list; list_for_each_entry(roothub_entry, head, list) { @@ -86,14 +88,13 @@ struct usb_phy_roothub *usb_phy_roothub_init(struct device *dev) goto err_exit_phys; } - return phy_roothub; + return 0; err_exit_phys: list_for_each_entry_continue_reverse(roothub_entry, head, list) phy_exit(roothub_entry->phy); -err_out: - return ERR_PTR(err); + return err; } EXPORT_SYMBOL_GPL(usb_phy_roothub_init); diff --git a/drivers/usb/core/phy.h b/drivers/usb/core/phy.h index 6fde59bfbff8..eb31253201ad 100644 --- a/drivers/usb/core/phy.h +++ b/drivers/usb/core/phy.h @@ -1,6 +1,8 @@ struct usb_phy_roothub; -struct usb_phy_roothub *usb_phy_roothub_init(struct device *dev); +struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev); + +int usb_phy_roothub_init(struct usb_phy_roothub *phy_roothub); int usb_phy_roothub_exit(struct usb_phy_roothub *phy_roothub); int usb_phy_roothub_power_on(struct usb_phy_roothub *phy_roothub); From f0e36d478faf37fb26413b2530d04e6b30af3834 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 18 Apr 2018 21:39:48 +0200 Subject: [PATCH 410/660] usb: core: use phy_exit during suspend if wake up is not supported If the USB controller can wake up the system (which is the case for example with the Mediatek USB3 IP) then we must not call phy_exit during suspend to ensure that the USB controller doesn't have to re-enumerate the devices during resume. However, if the USB controller cannot wake up the system (which is the case for example on various TI platforms using a dwc3 controller) then we must call phy_exit during suspend. Otherwise the PHY driver keeps the clocks enabled, which prevents the system from reaching the lowest power levels in the suspend state. Solve this by introducing two new functions in the PHY wrapper which are dedicated to the suspend and resume handling. If the controller can wake up the system the new usb_phy_roothub_suspend function will simply call usb_phy_roothub_power_off. However, if wake up is not supported by the controller it will also call usb_phy_roothub_exit. The also new usb_phy_roothub_resume function takes care of calling usb_phy_roothub_init (if the controller can't wake up the system) in addition to usb_phy_roothub_power_on. Fixes: 07dbff0ddbd86c ("usb: core: add a wrapper for the USB PHYs on the HCD") Fixes: 178a0bce05cbc1 ("usb: core: hcd: integrate the PHY wrapper into the HCD core") Reported-by: Roger Quadros Suggested-by: Roger Quadros Suggested-by: Chunfeng Yun Signed-off-by: Martin Blumenstingl Tested-by: Chunfeng Yun Reviewed-by: Roger Quadros Tested-by: Keerthy Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 +++++--- drivers/usb/core/phy.c | 35 +++++++++++++++++++++++++++++++++++ drivers/usb/core/phy.h | 5 +++++ 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index f0e5f4d875d8..0a42c5df3c0f 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2262,7 +2262,8 @@ int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg) hcd->state = HC_STATE_SUSPENDED; if (!PMSG_IS_AUTO(msg)) - usb_phy_roothub_power_off(hcd->phy_roothub); + usb_phy_roothub_suspend(hcd->self.sysdev, + hcd->phy_roothub); /* Did we race with a root-hub wakeup event? */ if (rhdev->do_remote_wakeup) { @@ -2302,7 +2303,8 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg) } if (!PMSG_IS_AUTO(msg)) { - status = usb_phy_roothub_power_on(hcd->phy_roothub); + status = usb_phy_roothub_resume(hcd->self.sysdev, + hcd->phy_roothub); if (status) return status; } @@ -2344,7 +2346,7 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg) } } else { hcd->state = old_state; - usb_phy_roothub_power_off(hcd->phy_roothub); + usb_phy_roothub_suspend(hcd->self.sysdev, hcd->phy_roothub); dev_dbg(&rhdev->dev, "bus %s fail, err %d\n", "resume", status); if (status != -ESHUTDOWN) diff --git a/drivers/usb/core/phy.c b/drivers/usb/core/phy.c index 44f008cda7a8..a39d9bb26a4f 100644 --- a/drivers/usb/core/phy.c +++ b/drivers/usb/core/phy.c @@ -157,3 +157,38 @@ void usb_phy_roothub_power_off(struct usb_phy_roothub *phy_roothub) phy_power_off(roothub_entry->phy); } EXPORT_SYMBOL_GPL(usb_phy_roothub_power_off); + +int usb_phy_roothub_suspend(struct device *controller_dev, + struct usb_phy_roothub *phy_roothub) +{ + usb_phy_roothub_power_off(phy_roothub); + + /* keep the PHYs initialized so the device can wake up the system */ + if (device_may_wakeup(controller_dev)) + return 0; + + return usb_phy_roothub_exit(phy_roothub); +} +EXPORT_SYMBOL_GPL(usb_phy_roothub_suspend); + +int usb_phy_roothub_resume(struct device *controller_dev, + struct usb_phy_roothub *phy_roothub) +{ + int err; + + /* if the device can't wake up the system _exit was called */ + if (!device_may_wakeup(controller_dev)) { + err = usb_phy_roothub_init(phy_roothub); + if (err) + return err; + } + + err = usb_phy_roothub_power_on(phy_roothub); + + /* undo _init if _power_on failed */ + if (err && !device_may_wakeup(controller_dev)) + usb_phy_roothub_exit(phy_roothub); + + return err; +} +EXPORT_SYMBOL_GPL(usb_phy_roothub_resume); diff --git a/drivers/usb/core/phy.h b/drivers/usb/core/phy.h index eb31253201ad..605555901d44 100644 --- a/drivers/usb/core/phy.h +++ b/drivers/usb/core/phy.h @@ -7,3 +7,8 @@ int usb_phy_roothub_exit(struct usb_phy_roothub *phy_roothub); int usb_phy_roothub_power_on(struct usb_phy_roothub *phy_roothub); void usb_phy_roothub_power_off(struct usb_phy_roothub *phy_roothub); + +int usb_phy_roothub_suspend(struct device *controller_dev, + struct usb_phy_roothub *phy_roothub); +int usb_phy_roothub_resume(struct device *controller_dev, + struct usb_phy_roothub *phy_roothub); From fec94445db7777c65b0a681b8dc0ed2532304d2a Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 18 Apr 2018 21:39:49 +0200 Subject: [PATCH 411/660] usb: core: phy: make it a no-op if CONFIG_GENERIC_PHY is disabled If the generic PHY support is disabled the stub of devm_of_phy_get_by_index returns ENOSYS. This corner case isn't handled properly by usb_phy_roothub_add_phy and at least breaks USB support on Raspberry Pi (bcm2835_defconfig): dwc2 20980000.usb: dwc2_hcd_init() FAILED, returning -38 dwc2: probe of 20980000.usb failed with error -38 Let usb_phy_roothub_alloc() return in case CONFIG_GENERIC_PHY is disabled to fix this issue (compilers might even be smart enough to optimize away most of the code within usb_phy_roothub_alloc and usb_phy_roothub_add_phy if CONFIG_GENERIC_PHY is disabled). All existing usb_phy_roothub_* functions are already NULL-safe, so no special handling is required there. Fixes: 07dbff0ddbd8 ("usb: core: add a wrapper for the USB PHYs on the HCD") Reported-by: Stefan Wahren Signed-off-by: Martin Blumenstingl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/phy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/phy.c b/drivers/usb/core/phy.c index a39d9bb26a4f..9879767452a2 100644 --- a/drivers/usb/core/phy.c +++ b/drivers/usb/core/phy.c @@ -50,6 +50,9 @@ struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev) struct usb_phy_roothub *phy_roothub; int i, num_phys, err; + if (!IS_ENABLED(CONFIG_GENERIC_PHY)) + return NULL; + num_phys = of_count_phandle_with_args(dev->of_node, "phys", "#phy-cells"); if (num_phys <= 0) From 9d3cd19be3e2675f5b30172f3305f79ddcb91023 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 18 Apr 2018 21:39:50 +0200 Subject: [PATCH 412/660] usb: core: phy: add missing forward declaration for "struct device" Currently hcd.c is the only consumer of the usb_phy_roothub logic. This already includes the required header files so struct device is known. However, future consumers might not know about struct device. Add a forward declaration for struct device to fix potential future consumers which don't include any of the struct device API headers. Fixes: 07dbff0ddbd86c ("usb: core: add a wrapper for the USB PHYs on the HCD") Suggested-by: Masahiro Yamada Signed-off-by: Martin Blumenstingl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/phy.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/core/phy.h b/drivers/usb/core/phy.h index 605555901d44..bbc969383074 100644 --- a/drivers/usb/core/phy.h +++ b/drivers/usb/core/phy.h @@ -1,3 +1,4 @@ +struct device; struct usb_phy_roothub; struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev); From 602f4fb71d2b4aa5a447d4ed25a58fd098df9058 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Apr 2018 14:21:53 +0200 Subject: [PATCH 413/660] usb: select USB_COMMON for usb role switch config The new axp288 extcon driver has no dependency on USB itself but calls the usb role switch helper functions. This causes a link error when USB_COMMON is disabled, as that subdirectory never gets entered: drivers/extcon/extcon-axp288.o: In function `axp288_usb_role_work': extcon-axp288.c:(.text+0x47b): undefined reference to `usb_role_switch_set_role' extcon-axp288.c:(.text+0x498): undefined reference to `usb_role_switch_get_role' drivers/extcon/extcon-axp288.o: In function `axp288_extcon_probe': extcon-axp288.c:(.text+0x675): undefined reference to `usb_role_switch_get' extcon-axp288.c:(.text+0x6d1): undefined reference to `usb_role_switch_put' drivers/extcon/extcon-axp288.o: In function `axp288_put_role_sw': extcon-axp288.c:(.text+0x1c): undefined reference to `usb_role_switch_put' There are multiple ways of fixing this, I chose to 'select USB_COMMON', since that is how we solved the same problem for other helpers like USB_LED_TRIG or PHY drivers. Fixes: d54f063cdbe4 ("extcon: axp288: Set USB role where necessary") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index 75f7fb151f71..987fc5ba6321 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -207,5 +207,6 @@ config USB_ULPI_BUS config USB_ROLE_SWITCH tristate + select USB_COMMON endif # USB_SUPPORT From 7fc65d4c2ba9e5006c629669146c6876b65aa233 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Fri, 13 Apr 2018 15:55:34 +0300 Subject: [PATCH 414/660] xhci: Fix Kernel oops in xhci dbgtty tty_unregister_driver may be called more than 1 time in some hotplug cases,it will cause the kernel oops. This patch checked dbc_tty_driver to make sure it is unregistered only 1 time. [ 175.741404] BUG: unable to handle kernel NULL pointer dereference at 0000000000000034 [ 175.742309] IP: tty_unregister_driver+0x9/0x70 [ 175.743148] PGD 0 P4D 0 [ 175.743981] Oops: 0000 [#1] SMP PTI [ 175.753904] RIP: 0010:tty_unregister_driver+0x9/0x70 [ 175.754817] RSP: 0018:ffffa8ff831d3bb0 EFLAGS: 00010246 [ 175.755753] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 175.756685] RDX: ffff92089c616000 RSI: ffffe64fe1b26080 RDI: 0000000000000000 [ 175.757608] RBP: ffff92086c988230 R08: 000000006c982701 R09: 00000001801e0016 [ 175.758533] R10: ffffa8ff831d3b48 R11: ffff92086c982100 R12: ffff92086c98827c [ 175.759462] R13: ffff92086c988398 R14: 0000000000000060 R15: ffff92089c5e9b40 [ 175.760401] FS: 0000000000000000(0000) GS:ffff9208a0100000(0000) knlGS:0000000000000000 [ 175.761334] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 175.762270] CR2: 0000000000000034 CR3: 000000011800a003 CR4: 00000000003606e0 [ 175.763225] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 175.764164] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 175.765091] Call Trace: [ 175.766014] xhci_dbc_tty_unregister_driver+0x11/0x30 [ 175.766960] xhci_dbc_exit+0x2a/0x40 [ 175.767889] xhci_stop+0x57/0x1c0 [ 175.768824] usb_remove_hcd+0x100/0x250 [ 175.769708] usb_hcd_pci_remove+0x68/0x130 [ 175.770574] pci_device_remove+0x3b/0xc0 [ 175.771435] device_release_driver_internal+0x157/0x230 [ 175.772343] pci_stop_bus_device+0x74/0xa0 [ 175.773205] pci_stop_bus_device+0x2b/0xa0 [ 175.774061] pci_stop_bus_device+0x2b/0xa0 [ 175.774907] pci_stop_bus_device+0x2b/0xa0 [ 175.775741] pci_stop_bus_device+0x2b/0xa0 [ 175.776618] pci_stop_bus_device+0x2b/0xa0 [ 175.777452] pci_stop_bus_device+0x2b/0xa0 [ 175.778273] pci_stop_bus_device+0x2b/0xa0 [ 175.779092] pci_stop_bus_device+0x2b/0xa0 [ 175.779908] pci_stop_bus_device+0x2b/0xa0 [ 175.780750] pci_stop_bus_device+0x2b/0xa0 [ 175.781543] pci_stop_and_remove_bus_device+0xe/0x20 [ 175.782338] pciehp_unconfigure_device+0xb8/0x160 [ 175.783128] pciehp_disable_slot+0x4f/0xd0 [ 175.783920] pciehp_power_thread+0x82/0xa0 [ 175.784766] process_one_work+0x147/0x3c0 [ 175.785564] worker_thread+0x4a/0x440 [ 175.786376] kthread+0xf8/0x130 [ 175.787174] ? rescuer_thread+0x360/0x360 [ 175.787964] ? kthread_associate_blkcg+0x90/0x90 [ 175.788798] ret_from_fork+0x35/0x40 Cc: # 4.16 Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Signed-off-by: Zhengjun Xing Tested-by: Christian Kellner Reviewed-by: Christian Kellner Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgtty.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index 48779c44c361..eb494ec547e8 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -320,9 +320,11 @@ int xhci_dbc_tty_register_driver(struct xhci_hcd *xhci) void xhci_dbc_tty_unregister_driver(void) { - tty_unregister_driver(dbc_tty_driver); - put_tty_driver(dbc_tty_driver); - dbc_tty_driver = NULL; + if (dbc_tty_driver) { + tty_unregister_driver(dbc_tty_driver); + put_tty_driver(dbc_tty_driver); + dbc_tty_driver = NULL; + } } static void dbc_rx_push(unsigned long _port) From e9ec22547986dd32c5c70da78107ce35dbff1344 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 7 Apr 2018 10:19:50 -0700 Subject: [PATCH 415/660] tty: n_gsm: Fix long delays with control frame timeouts in ADM mode Commit ea3d8465ab9b ("tty: n_gsm: Allow ADM response in addition to UA for control dlci") added support for DLCI to stay in Asynchronous Disconnected Mode (ADM). But we still get long delays waiting for commands to other DLCI to complete: --> 5) C: SABM(P) Q> 0) C: UIH(F) Q> 0) C: UIH(F) Q> 0) C: UIH(F) ... This happens because gsm_control_send() sets cretries timer to T2 that is by default set to 34. This will cause resend for T2 times for the control frame. In ADM mode, we will never get a response so the control frame, so retries are just delaying all the commands. Let's fix the issue by setting DLCI_MODE_ADM flag after detecting the ADM mode for the control DLCI. Then we can use that in gsm_control_send() to set retries to 1. This means the control frame will be sent once allowing the other end at an opportunity to switch from ADM to ABM mode. Note that retries will be decremented in gsm_control_retransmit() so we don't want to set it to 0 here. Fixes: ea3d8465ab9b ("tty: n_gsm: Allow ADM response in addition to UA for control dlci") Cc: linux-serial@vger.kernel.org Cc: Alan Cox Cc: Dan Williams Cc: Jiri Prchal Cc: Jiri Slaby Cc: Marcel Partap Cc: Merlijn Wajer Cc: Michael Nazzareno Trimarchi Cc: Michael Scott Cc: Pavel Machek Cc: Peter Hurley Cc: Russ Gorby Cc: Sascha Hauer Cc: Sebastian Reichel Signed-off-by: Tony Lindgren Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 3b3e1f6632d7..7b1f8636f8e9 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -121,6 +121,9 @@ struct gsm_dlci { struct mutex mutex; /* Link layer */ + int mode; +#define DLCI_MODE_ABM 0 /* Normal Asynchronous Balanced Mode */ +#define DLCI_MODE_ADM 1 /* Asynchronous Disconnected Mode */ spinlock_t lock; /* Protects the internal state */ struct timer_list t1; /* Retransmit timer for SABM and UA */ int retries; @@ -1364,7 +1367,13 @@ retry: ctrl->data = data; ctrl->len = clen; gsm->pending_cmd = ctrl; - gsm->cretries = gsm->n2; + + /* If DLCI0 is in ADM mode skip retries, it won't respond */ + if (gsm->dlci[0]->mode == DLCI_MODE_ADM) + gsm->cretries = 1; + else + gsm->cretries = gsm->n2; + mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); gsm_control_transmit(gsm, ctrl); spin_unlock_irqrestore(&gsm->control_lock, flags); @@ -1472,6 +1481,7 @@ static void gsm_dlci_t1(struct timer_list *t) if (debug & 8) pr_info("DLCI %d opening in ADM mode.\n", dlci->addr); + dlci->mode = DLCI_MODE_ADM; gsm_dlci_open(dlci); } else { gsm_dlci_close(dlci); From b2d89ad9c9682e795ed6eeb9ed455789ad6cedf1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 7 Apr 2018 10:19:51 -0700 Subject: [PATCH 416/660] tty: n_gsm: Fix DLCI handling for ADM mode if debug & 2 is not set At least on droid 4 with control channel in ADM mode, there is no response to Modem Status Command (MSC). Currently gsmtty_modem_update() expects to have data in dlci->modem_rx unless debug & 2 is set. This means that on droid 4, things only work if debug & 2 is set. Let's fix the issue by ignoring empty dlci->modem_rx for ADM mode. In the AMD mode, CMD_MSC will never respond and gsm_process_modem() won't get called to set dlci->modem_rx. And according to ts_127010v140000p.pdf, MSC is only relevant if basic option is chosen, so let's test for that too. Fixes: ea3d8465ab9b ("tty: n_gsm: Allow ADM response in addition to UA for control dlci") Cc: linux-serial@vger.kernel.org Cc: Alan Cox Cc: Dan Williams Cc: Jiri Prchal Cc: Jiri Slaby Cc: Marcel Partap Cc: Merlijn Wajer Cc: Michael Nazzareno Trimarchi Cc: Michael Scott Cc: Pavel Machek Cc: Peter Hurley Cc: Russ Gorby Cc: Sascha Hauer Cc: Sebastian Reichel Signed-off-by: Tony Lindgren Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 7b1f8636f8e9..1dbe27c9946c 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2871,11 +2871,22 @@ static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) static int gsm_carrier_raised(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); + struct gsm_mux *gsm = dlci->gsm; + /* Not yet open so no carrier info */ if (dlci->state != DLCI_OPEN) return 0; if (debug & 2) return 1; + + /* + * Basic mode with control channel in ADM mode may not respond + * to CMD_MSC at all and modem_rx is empty. + */ + if (gsm->encoding == 0 && gsm->dlci[0]->mode == DLCI_MODE_ADM && + !dlci->modem_rx) + return 1; + return dlci->modem_rx & TIOCM_CD; } From 46c6975a1fd9794ed979565235d24b2f5004e014 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 22 Apr 2018 13:33:46 +0100 Subject: [PATCH 417/660] serial: mvebu-uart: Fix local flags handling on termios update Commit 68a0db1d7da2 reworked the baud rate selection, but also added a (not so) subtle change in the way the local flags (c_lflag in the termios structure) are handled, forcing the new flags to always be the same as the old ones. The reason for that particular change is both obscure and undocumented. It also completely breaks userspace. Something as trivial as getty is unusable: Debian GNU/Linux 9 sy-borg ttyMV0 sy-borg login: root root [timeout] Debian GNU/Linux 9 sy-borg ttyMV0 which is quite obvious in retrospect: getty cannot get in control of the echo mode, is stuck in canonical mode, and times out without ever seeing anything valid. It also begs the question of how this change was ever tested. The fix is pretty obvious: stop messing with c_lflag, and the world will be a happier place. Cc: stable@vger.kernel.org # 4.15+ Fixes: 68a0db1d7da2 ("serial: mvebu-uart: add function to change baudrate") Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 750e5645dc85..f503fab1e268 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -495,7 +495,6 @@ static void mvebu_uart_set_termios(struct uart_port *port, termios->c_iflag |= old->c_iflag & ~(INPCK | IGNPAR); termios->c_cflag &= CREAD | CBAUD; termios->c_cflag |= old->c_cflag & ~(CREAD | CBAUD); - termios->c_lflag = old->c_lflag; } spin_unlock_irqrestore(&port->lock, flags); From 23566c3798f315058862564ab3d01882316054eb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 10 Apr 2018 17:15:13 +0200 Subject: [PATCH 418/660] mtd: nand: Fix nanddev_mtd_erase() Commit e7bfb3fdbde3 ("mtd: Stop updating erase_info->state and calling mtd_erase_callback()") removed the einfo->state field and the MTD_ERASE_XXX macros. At the same time, the generic NAND layer was added and made sure to update the erase info state. It did not result in a build failure after merging the nand/for-4.17 branch in mtd/next because the generic NAND layer is not selected yet. Let's fix that before a config option starts selecting MTD_NAND_CORE. Fixes: e7bfb3fdbde3 ("mtd: Stop updating erase_info->state and calling mtd_erase_callback()") Signed-off-by: Boris Brezillon --- drivers/mtd/nand/core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mtd/nand/core.c b/drivers/mtd/nand/core.c index d0cd6f8635d7..9c9f8936b63b 100644 --- a/drivers/mtd/nand/core.c +++ b/drivers/mtd/nand/core.c @@ -162,7 +162,6 @@ int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo) ret = nanddev_erase(nand, &pos); if (ret) { einfo->fail_addr = nanddev_pos_to_offs(nand, &pos); - einfo->state = MTD_ERASE_FAILED; return ret; } @@ -170,8 +169,6 @@ int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo) nanddev_pos_next_eraseblock(nand, &pos); } - einfo->state = MTD_ERASE_DONE; - return 0; } EXPORT_SYMBOL_GPL(nanddev_mtd_erase); From 686c97ee29c886ee07d17987d0059874c5c3b5af Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:06 +0200 Subject: [PATCH 419/660] s390/qeth: fix error handling in adapter command callbacks Make sure to check both return code fields before(!) processing the command response. Otherwise we risk operating on invalid data. This matches an earlier fix for SETASSPARMS commands, see commit ad3cbf613329 ("s390/qeth: fix error handling in checksum cmd callback"). Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 85 ++++++++++++++----------------- 1 file changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 04fefa5bb08d..36bc94088de1 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3033,28 +3033,23 @@ static int qeth_send_startlan(struct qeth_card *card) return rc; } -static int qeth_default_setadapterparms_cb(struct qeth_card *card, - struct qeth_reply *reply, unsigned long data) +static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd) { - struct qeth_ipa_cmd *cmd; - - QETH_CARD_TEXT(card, 4, "defadpcb"); - - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) + if (!cmd->hdr.return_code) cmd->hdr.return_code = cmd->data.setadapterparms.hdr.return_code; - return 0; + return cmd->hdr.return_code; } static int qeth_query_setadapterparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 3, "quyadpcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) { card->info.link_type = cmd->data.setadapterparms.data.query_cmds_supp.lan_type; @@ -3062,7 +3057,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, } card->options.adp.supported_funcs = cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds; - return qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); + return 0; } static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card, @@ -3154,22 +3149,20 @@ EXPORT_SYMBOL_GPL(qeth_query_ipassists); static int qeth_query_switch_attributes_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - struct qeth_switch_info *sw_info; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_query_switch_attributes *attrs; + struct qeth_switch_info *sw_info; QETH_CARD_TEXT(card, 2, "qswiatcb"); - cmd = (struct qeth_ipa_cmd *) data; - sw_info = (struct qeth_switch_info *)reply->param; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - attrs = &cmd->data.setadapterparms.data.query_switch_attributes; - sw_info->capabilities = attrs->capabilities; - sw_info->settings = attrs->settings; - QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, - sw_info->settings); - } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; + sw_info = (struct qeth_switch_info *)reply->param; + attrs = &cmd->data.setadapterparms.data.query_switch_attributes; + sw_info->capabilities = attrs->capabilities; + sw_info->settings = attrs->settings; + QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, + sw_info->settings); return 0; } @@ -4207,16 +4200,13 @@ EXPORT_SYMBOL_GPL(qeth_do_send_packet); static int qeth_setadp_promisc_mode_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_ipacmd_setadpparms *setparms; QETH_CARD_TEXT(card, 4, "prmadpcb"); - cmd = (struct qeth_ipa_cmd *) data; setparms = &(cmd->data.setadapterparms); - - qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); - if (cmd->hdr.return_code) { + if (qeth_setadpparms_inspect_rc(cmd)) { QETH_CARD_TEXT_(card, 4, "prmrc%x", cmd->hdr.return_code); setparms->data.mode = SET_PROMISC_MODE_OFF; } @@ -4286,18 +4276,18 @@ EXPORT_SYMBOL_GPL(qeth_get_stats); static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 4, "chgmaccb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (!card->options.layer2 || !(card->info.mac_bits & QETH_LAYER2_MAC_READ)) { ether_addr_copy(card->dev->dev_addr, cmd->data.setadapterparms.data.change_addr.addr); card->info.mac_bits |= QETH_LAYER2_MAC_READ; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4328,13 +4318,15 @@ EXPORT_SYMBOL_GPL(qeth_setadpparms_change_macaddr); static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_set_access_ctrl *access_ctrl_req; int fallback = *(int *)reply->param; QETH_CARD_TEXT(card, 4, "setaccb"); + if (cmd->hdr.return_code) + return 0; + qeth_setadpparms_inspect_rc(cmd); - cmd = (struct qeth_ipa_cmd *) data; access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; QETH_DBF_TEXT_(SETUP, 2, "setaccb"); QETH_DBF_TEXT_(SETUP, 2, "%s", card->gdev->dev.kobj.name); @@ -4407,7 +4399,6 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, card->options.isolation = card->options.prev_isolation; break; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4695,14 +4686,15 @@ out: static int qeth_setadpparms_query_oat_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_qoat_priv *priv; char *resdata; int resdatalen; QETH_CARD_TEXT(card, 3, "qoatcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *)data; priv = (struct qeth_qoat_priv *)reply->param; resdatalen = cmd->data.setadapterparms.hdr.cmdlength; resdata = (char *)data + 28; @@ -4796,21 +4788,18 @@ out: static int qeth_query_card_info_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct carrier_info *carrier_info = (struct carrier_info *)reply->param; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_query_card_info *card_info; - struct carrier_info *carrier_info; QETH_CARD_TEXT(card, 2, "qcrdincb"); - carrier_info = (struct carrier_info *)reply->param; - cmd = (struct qeth_ipa_cmd *)data; - card_info = &cmd->data.setadapterparms.data.card_info; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - carrier_info->card_type = card_info->card_type; - carrier_info->port_mode = card_info->port_mode; - carrier_info->port_speed = card_info->port_speed; - } + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + card_info = &cmd->data.setadapterparms.data.card_info; + carrier_info->card_type = card_info->card_type; + carrier_info->port_mode = card_info->port_mode; + carrier_info->port_speed = card_info->port_speed; return 0; } From 901e3f49facbd31b2b3d1786637b4a35e1022e9b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:07 +0200 Subject: [PATCH 420/660] s390/qeth: avoid control IO completion stalls For control IO, qeth currently tracks the index of the buffer that it expects to complete the next IO on each qeth_channel. If the channel presents an IRQ while this buffer has not yet completed, no completion processing for _any_ completed buffer takes place. So if the 'next buffer' is skipped for any sort of reason* (eg. when it is released due to error conditions, before the IO is started), the buffer obviously won't switch to PROCESSED until it is eventually allocated for a _different_ IO and completes. Until this happens, all completion processing on that channel stalls and pending requests possibly time out. As a fix, remove the whole 'next buffer' logic and simply process any IO buffer right when it completes. A channel will never have more than one IO pending, so there's no risk of processing out-of-sequence. *Note: currently just one location in the code really handles this problem, by advancing the 'next' index manually. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 2 -- drivers/s390/net/qeth_core_main.c | 22 +++++----------------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 4326715dc13e..78b98b3e7efa 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -557,7 +557,6 @@ enum qeth_prot_versions { enum qeth_cmd_buffer_state { BUF_STATE_FREE, BUF_STATE_LOCKED, - BUF_STATE_PROCESSED, }; enum qeth_cq { @@ -601,7 +600,6 @@ struct qeth_channel { struct qeth_cmd_buffer iob[QETH_CMD_BUFFER_NO]; atomic_t irq_pending; int io_buf_no; - int buf_no; }; /** diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 36bc94088de1..5ec47c6ebaa6 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -818,7 +818,6 @@ void qeth_clear_cmd_buffers(struct qeth_channel *channel) for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++) qeth_release_buffer(channel, &channel->iob[cnt]); - channel->buf_no = 0; channel->io_buf_no = 0; } EXPORT_SYMBOL_GPL(qeth_clear_cmd_buffers); @@ -924,7 +923,6 @@ static int qeth_setup_channel(struct qeth_channel *channel) kfree(channel->iob[cnt].data); return -ENOMEM; } - channel->buf_no = 0; channel->io_buf_no = 0; atomic_set(&channel->irq_pending, 0); spin_lock_init(&channel->iob_lock); @@ -1100,11 +1098,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, { int rc; int cstat, dstat; - struct qeth_cmd_buffer *buffer; struct qeth_channel *channel; struct qeth_card *card; struct qeth_cmd_buffer *iob; - __u8 index; if (__qeth_check_irb_error(cdev, intparm, irb)) return; @@ -1182,25 +1178,18 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, channel->state = CH_STATE_RCD_DONE; goto out; } - if (intparm) { - buffer = (struct qeth_cmd_buffer *) __va((addr_t)intparm); - buffer->state = BUF_STATE_PROCESSED; - } if (channel == &card->data) return; if (channel == &card->read && channel->state == CH_STATE_UP) __qeth_issue_next_read(card); - iob = channel->iob; - index = channel->buf_no; - while (iob[index].state == BUF_STATE_PROCESSED) { - if (iob[index].callback != NULL) - iob[index].callback(channel, iob + index); - - index = (index + 1) % QETH_CMD_BUFFER_NO; + if (intparm) { + iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm); + if (iob->callback) + iob->callback(iob->channel, iob); } - channel->buf_no = index; + out: wake_up(&card->wait_q); return; @@ -2214,7 +2203,6 @@ time_err: error: atomic_set(&card->write.irq_pending, 0); qeth_release_buffer(iob->channel, iob); - card->write.buf_no = (card->write.buf_no + 1) % QETH_CMD_BUFFER_NO; rc = reply->rc; qeth_put_reply(reply); return rc; From a936b1ef37ce1e996533878f4b23944f9444dcdf Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:08 +0200 Subject: [PATCH 421/660] s390/qeth: handle failure on workqueue creation Creating the global workqueue during driver init may fail, deal with it. Also, destroy the created workqueue on any subsequent error. Fixes: 0f54761d167f ("qeth: Support VEPA mode") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 5ec47c6ebaa6..9a08b545d018 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6540,10 +6540,14 @@ static int __init qeth_core_init(void) mutex_init(&qeth_mod_mutex); qeth_wq = create_singlethread_workqueue("qeth_wq"); + if (!qeth_wq) { + rc = -ENOMEM; + goto out_err; + } rc = qeth_register_dbf_views(); if (rc) - goto out_err; + goto dbf_err; qeth_core_root_dev = root_device_register("qeth"); rc = PTR_ERR_OR_ZERO(qeth_core_root_dev); if (rc) @@ -6580,6 +6584,8 @@ slab_err: root_device_unregister(qeth_core_root_dev); register_err: qeth_unregister_dbf_views(); +dbf_err: + destroy_workqueue(qeth_wq); out_err: pr_err("Initializing the qeth device driver failed\n"); return rc; From bcacfcbc82b4235d280ed9b067aa4567f4a0c756 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:09 +0200 Subject: [PATCH 422/660] s390/qeth: fix MAC address update sequence When changing the MAC address on a L2 qeth device, current code first unregisters the old address, then registers the new one. If HW rejects the new address (or the IO fails), the device ends up with no operable address at all. Re-order the code flow so that the old address only gets dropped if the new address was registered successfully. While at it, add logic to catch some corner-cases. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 55 +++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2ad6f12f3d49..36f9b74848fe 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -121,13 +121,10 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) QETH_CARD_TEXT(card, 2, "L2Setmac"); rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_SETVMAC); if (rc == 0) { - card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; - ether_addr_copy(card->dev->dev_addr, mac); dev_info(&card->gdev->dev, - "MAC address %pM successfully registered on device %s\n", - card->dev->dev_addr, card->dev->name); + "MAC address %pM successfully registered on device %s\n", + mac, card->dev->name); } else { - card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED; switch (rc) { case -EEXIST: dev_warn(&card->gdev->dev, @@ -142,19 +139,6 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) return rc; } -static int qeth_l2_send_delmac(struct qeth_card *card, __u8 *mac) -{ - int rc; - - QETH_CARD_TEXT(card, 2, "L2Delmac"); - if (!(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) - return 0; - rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_DELVMAC); - if (rc == 0) - card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED; - return rc; -} - static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) { enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? @@ -519,6 +503,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) { struct sockaddr *addr = p; struct qeth_card *card = dev->ml_priv; + u8 old_addr[ETH_ALEN]; int rc = 0; QETH_CARD_TEXT(card, 3, "setmac"); @@ -530,14 +515,35 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) return -EOPNOTSUPP; } QETH_CARD_HEX(card, 3, addr->sa_data, ETH_ALEN); + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { QETH_CARD_TEXT(card, 3, "setmcREC"); return -ERESTARTSYS; } - rc = qeth_l2_send_delmac(card, &card->dev->dev_addr[0]); - if (!rc || (rc == -ENOENT)) - rc = qeth_l2_send_setmac(card, addr->sa_data); - return rc ? -EINVAL : 0; + + if (!qeth_card_hw_is_reachable(card)) { + ether_addr_copy(dev->dev_addr, addr->sa_data); + return 0; + } + + /* don't register the same address twice */ + if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && + (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) + return 0; + + /* add the new address, switch over, drop the old */ + rc = qeth_l2_send_setmac(card, addr->sa_data); + if (rc) + return rc; + ether_addr_copy(old_addr, dev->dev_addr); + ether_addr_copy(dev->dev_addr, addr->sa_data); + + if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) + qeth_l2_remove_mac(card, old_addr); + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; + return 0; } static void qeth_promisc_to_bridge(struct qeth_card *card) @@ -1067,8 +1073,9 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) goto out_remove; } - if (card->info.type != QETH_CARD_TYPE_OSN) - qeth_l2_send_setmac(card, &card->dev->dev_addr[0]); + if (card->info.type != QETH_CARD_TYPE_OSN && + !qeth_l2_send_setmac(card, card->dev->dev_addr)) + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && From db71bbbd11a4d314f0fa3fbf3369b71cf33ce33c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:10 +0200 Subject: [PATCH 423/660] s390/qeth: fix request-side race during cmd IO timeout Submitting a cmd IO request (usually on the WRITE device, but for IDX also on the READ device) is currently done with ccw_device_start() and a manual timeout in the caller. On timeout, the caller cleans up the related resources (eg. IO buffer). But 1) the IO might still be active and utilize those resources, and 2) when the IO completes, qeth_irq() will attempt to clean up the same resources again. Instead of introducing additional resource locking, switch to ccw_device_start_timeout() to ensure IO termination after timeout, and let the IRQ handler alone deal with cleaning up after a request. This also removes a stray write->irq_pending reset from clear_ipacmd_list(). The routine doesn't terminate any pending IO on the WRITE device, so this should be handled properly via IO timeout in the IRQ handler. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 51 ++++++++++++++++--------------- drivers/s390/net/qeth_core_mpc.h | 12 ++++++++ drivers/s390/net/qeth_l2_main.c | 4 +-- 3 files changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9a08b545d018..9b22d5d496ae 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -706,7 +706,6 @@ void qeth_clear_ipacmd_list(struct qeth_card *card) qeth_put_reply(reply); } spin_unlock_irqrestore(&card->lock, flags); - atomic_set(&card->write.irq_pending, 0); } EXPORT_SYMBOL_GPL(qeth_clear_ipacmd_list); @@ -1098,14 +1097,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, { int rc; int cstat, dstat; + struct qeth_cmd_buffer *iob = NULL; struct qeth_channel *channel; struct qeth_card *card; - struct qeth_cmd_buffer *iob; - - if (__qeth_check_irb_error(cdev, intparm, irb)) - return; - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; card = CARD_FROM_CDEV(cdev); if (!card) @@ -1123,6 +1117,19 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, channel = &card->data; QETH_CARD_TEXT(card, 5, "data"); } + + if (qeth_intparm_is_iob(intparm)) + iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm); + + if (__qeth_check_irb_error(cdev, intparm, irb)) { + /* IO was terminated, free its resources. */ + if (iob) + qeth_release_buffer(iob->channel, iob); + atomic_set(&channel->irq_pending, 0); + wake_up(&card->wait_q); + return; + } + atomic_set(&channel->irq_pending, 0); if (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC)) @@ -1146,6 +1153,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, /* we don't have to handle this further */ intparm = 0; } + + cstat = irb->scsw.cmd.cstat; + dstat = irb->scsw.cmd.dstat; + if ((dstat & DEV_STAT_UNIT_EXCEP) || (dstat & DEV_STAT_UNIT_CHECK) || (cstat)) { @@ -1184,11 +1195,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, channel->state == CH_STATE_UP) __qeth_issue_next_read(card); - if (intparm) { - iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm); - if (iob->callback) - iob->callback(iob->channel, iob); - } + if (iob && iob->callback) + iob->callback(iob->channel, iob); out: wake_up(&card->wait_q); @@ -1859,8 +1867,8 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel, atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0); QETH_DBF_TEXT(SETUP, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_start(channel->ccwdev, - &channel->ccw, (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(channel->ccwdev, &channel->ccw, + (addr_t) iob, 0, 0, QETH_TIMEOUT); spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); if (rc) { @@ -1877,7 +1885,6 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel, if (channel->state != CH_STATE_UP) { rc = -ETIME; QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc); - qeth_clear_cmd_buffers(channel); } else rc = 0; return rc; @@ -1931,8 +1938,8 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel, atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0); QETH_DBF_TEXT(SETUP, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_start(channel->ccwdev, - &channel->ccw, (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(channel->ccwdev, &channel->ccw, + (addr_t) iob, 0, 0, QETH_TIMEOUT); spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); if (rc) { @@ -1953,7 +1960,6 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel, QETH_DBF_MESSAGE(2, "%s IDX activate timed out\n", dev_name(&channel->ccwdev->dev)); QETH_DBF_TEXT_(SETUP, 2, "2err%d", -ETIME); - qeth_clear_cmd_buffers(channel); return -ETIME; } return qeth_idx_activate_get_answer(channel, idx_reply_cb); @@ -2155,8 +2161,8 @@ int qeth_send_control_data(struct qeth_card *card, int len, QETH_CARD_TEXT(card, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags); - rc = ccw_device_start(card->write.ccwdev, &card->write.ccw, - (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(CARD_WDEV(card), &card->write.ccw, + (addr_t) iob, 0, 0, event_timeout); spin_unlock_irqrestore(get_ccwdev_lock(card->write.ccwdev), flags); if (rc) { QETH_DBF_MESSAGE(2, "%s qeth_send_control_data: " @@ -2188,8 +2194,6 @@ int qeth_send_control_data(struct qeth_card *card, int len, } } - if (reply->rc == -EIO) - goto error; rc = reply->rc; qeth_put_reply(reply); return rc; @@ -2200,9 +2204,6 @@ time_err: list_del_init(&reply->list); spin_unlock_irqrestore(&reply->card->lock, flags); atomic_inc(&reply->received); -error: - atomic_set(&card->write.irq_pending, 0); - qeth_release_buffer(iob->channel, iob); rc = reply->rc; qeth_put_reply(reply); return rc; diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 619f897b4bb0..f4d1ec0b8f5a 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -35,6 +35,18 @@ extern unsigned char IPA_PDU_HEADER[]; #define QETH_HALT_CHANNEL_PARM -11 #define QETH_RCD_PARM -12 +static inline bool qeth_intparm_is_iob(unsigned long intparm) +{ + switch (intparm) { + case QETH_CLEAR_CHANNEL_PARM: + case QETH_HALT_CHANNEL_PARM: + case QETH_RCD_PARM: + case 0: + return false; + } + return true; +} + /*****************************************************************************/ /* IP Assist related definitions */ /*****************************************************************************/ diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 36f9b74848fe..b8079f2a65b3 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1345,8 +1345,8 @@ static int qeth_osn_send_control_data(struct qeth_card *card, int len, qeth_prepare_control_data(card, len, iob); QETH_CARD_TEXT(card, 6, "osnoirqp"); spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags); - rc = ccw_device_start(card->write.ccwdev, &card->write.ccw, - (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(CARD_WDEV(card), &card->write.ccw, + (addr_t) iob, 0, 0, QETH_IPA_TIMEOUT); spin_unlock_irqrestore(get_ccwdev_lock(card->write.ccwdev), flags); if (rc) { QETH_DBF_MESSAGE(2, "qeth_osn_send_control_data: " From b7493e91c11a757cf0f8ab26989642ee4bb2c642 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:11 +0200 Subject: [PATCH 424/660] s390/qeth: use Read device to query hypervisor for MAC For z/VM NICs, qeth needs to consider which of the three CCW devices in an MPC group it uses for requesting a managed MAC address. On the Base device, the hypervisor returns a default MAC which is pre-assigned when creating the NIC (this MAC is also returned by the READ MAC primitive). Querying any other device results in the allocation of an additional MAC address. For consistency with READ MAC and to avoid using up more addresses than necessary, it is preferable to use the NIC's default MAC. So switch the the diag26c over to using a NIC's Read device, which should always be identical to the Base device. Fixes: ec61bd2fd2a2 ("s390/qeth: use diag26c to get MAC address on L2") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9b22d5d496ae..dffd820731f2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4835,7 +4835,7 @@ int qeth_vm_request_mac(struct qeth_card *card) goto out; } - ccw_device_get_id(CARD_DDEV(card), &id); + ccw_device_get_id(CARD_RDEV(card), &id); request->resp_buf_len = sizeof(*response); request->resp_version = DIAG26C_VERSION2; request->op_code = DIAG26C_GET_MAC; From 5411b6187adf62909e3b998ac782e722904c7487 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 19 Apr 2018 16:20:48 +0200 Subject: [PATCH 425/660] l2tp: fix {pppol2tp, l2tp_dfs}_seq_stop() in case of seq_file overflow Commit 0e0c3fee3a59 ("l2tp: hold reference on tunnels printed in pppol2tp proc file") assumed that if pppol2tp_seq_stop() was called with non-NULL private data (the 'v' pointer), then pppol2tp_seq_start() would not be called again. It turns out that this isn't guaranteed, and overflowing the seq_file's buffer in pppol2tp_seq_show() is a way to get into this situation. Therefore, pppol2tp_seq_stop() needs to reset pd->tunnel, so that pppol2tp_seq_start() won't drop a reference again if it gets called. We also have to clear pd->session, because the rest of the code expects a non-NULL tunnel when pd->session is set. The l2tp_debugfs module has the same issue. Fix it in the same way. Fixes: 0e0c3fee3a59 ("l2tp: hold reference on tunnels printed in pppol2tp proc file") Fixes: f726214d9b23 ("l2tp: hold reference on tunnels printed in l2tp/tunnels debugfs file") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_debugfs.c | 5 ++++- net/l2tp/l2tp_ppp.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index b8f9d45bfeb1..7f1e842ef05a 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -106,8 +106,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v) return; /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */ - if (pd->tunnel) + if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); + pd->tunnel = NULL; + pd->session = NULL; + } } static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7d0c963680e6..1404bc1c1bb7 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1618,8 +1618,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v) return; /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */ - if (pd->tunnel) + if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); + pd->tunnel = NULL; + pd->session = NULL; + } } static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) From b905ef9ab90115d001c1658259af4b1c65088779 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 19 Apr 2018 12:25:38 -0700 Subject: [PATCH 426/660] llc: delete timers synchronously in llc_sk_free() The connection timers of an llc sock could be still flying after we delete them in llc_sk_free(), and even possibly after we free the sock. We could just wait synchronously here in case of troubles. Note, I leave other call paths as they are, since they may not have to wait, at least we can change them to synchronously when needed. Also, move the code to net/llc/llc_conn.c, which is apparently a better place. Reported-by: Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/llc_conn.h | 1 + net/llc/llc_c_ac.c | 9 +-------- net/llc/llc_conn.c | 22 +++++++++++++++++++++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 5c40f118c0fa..df528a623548 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -97,6 +97,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); +void llc_sk_stop_all_timers(struct sock *sk, bool sync); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 163121192aca..4d78375f9872 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1099,14 +1099,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb) { - struct llc_sock *llc = llc_sk(sk); - - del_timer(&llc->pf_cycle_timer.timer); - del_timer(&llc->ack_timer.timer); - del_timer(&llc->rej_sent_timer.timer); - del_timer(&llc->busy_state_timer.timer); - llc->ack_must_be_send = 0; - llc->ack_pf = 0; + llc_sk_stop_all_timers(sk, false); return 0; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 110e32bcb399..c0ac522b48a1 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -961,6 +961,26 @@ out: return sk; } +void llc_sk_stop_all_timers(struct sock *sk, bool sync) +{ + struct llc_sock *llc = llc_sk(sk); + + if (sync) { + del_timer_sync(&llc->pf_cycle_timer.timer); + del_timer_sync(&llc->ack_timer.timer); + del_timer_sync(&llc->rej_sent_timer.timer); + del_timer_sync(&llc->busy_state_timer.timer); + } else { + del_timer(&llc->pf_cycle_timer.timer); + del_timer(&llc->ack_timer.timer); + del_timer(&llc->rej_sent_timer.timer); + del_timer(&llc->busy_state_timer.timer); + } + + llc->ack_must_be_send = 0; + llc->ack_pf = 0; +} + /** * llc_sk_free - Frees a LLC socket * @sk - socket to free @@ -973,7 +993,7 @@ void llc_sk_free(struct sock *sk) llc->state = LLC_CONN_OUT_OF_SVC; /* Stop all (possibly) running timers */ - llc_conn_ac_stop_all_timers(sk, NULL); + llc_sk_stop_all_timers(sk, true); #ifdef DEBUG_LLC_CONN_ALLOC printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__, skb_queue_len(&llc->pdu_unack_q), From 5e391dc5a8d801a2410d0032ad4a428d1d61800c Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 19 Apr 2018 22:49:09 +0300 Subject: [PATCH 427/660] net: ethernet: ti: cpsw: fix tx vlan priority mapping The CPDMA_TX_PRIORITY_MAP in real is vlan pcp field priority mapping register and basically replaces vlan pcp field for tagged packets. So, set it to be 1:1 mapping. Otherwise, it will cause unexpected change of egress vlan tagged packets, like prio 2 -> prio 5. Fixes: e05107e6b747 ("net: ethernet: ti: cpsw: add multi queue support") Reviewed-by: Grygorii Strashko Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 30371274409d..74f828412055 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -129,7 +129,7 @@ do { \ #define RX_PRIORITY_MAPPING 0x76543210 #define TX_PRIORITY_MAPPING 0x33221100 -#define CPDMA_TX_PRIORITY_MAP 0x01234567 +#define CPDMA_TX_PRIORITY_MAP 0x76543210 #define CPSW_VLAN_AWARE BIT(1) #define CPSW_RX_VLAN_ENCAP BIT(2) From 3a04ce7130a7e5dad4e78d45d50313747f8c830f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 19 Apr 2018 21:54:34 -0700 Subject: [PATCH 428/660] llc: fix NULL pointer deref for SOCK_ZAPPED For SOCK_ZAPPED socket, we don't need to care about llc->sap, so we should just skip these refcount functions in this case. Fixes: f7e43672683b ("llc: hold llc_sap before release_sock()") Reported-by: kernel test robot Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/llc/af_llc.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6d29b2b94e84..cb80ebb38311 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -189,7 +189,6 @@ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; - struct llc_sap *sap; if (unlikely(sk == NULL)) goto out; @@ -200,15 +199,19 @@ static int llc_ui_release(struct socket *sock) llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); - sap = llc->sap; - /* Hold this for release_sock(), so that llc_backlog_rcv() could still - * use it. - */ - llc_sap_hold(sap); - if (!sock_flag(sk, SOCK_ZAPPED)) + if (!sock_flag(sk, SOCK_ZAPPED)) { + struct llc_sap *sap = llc->sap; + + /* Hold this for release_sock(), so that llc_backlog_rcv() + * could still use it. + */ + llc_sap_hold(sap); llc_sap_remove_socket(llc->sap, sk); - release_sock(sk); - llc_sap_put(sap); + release_sock(sk); + llc_sap_put(sap); + } else { + release_sock(sk); + } if (llc->dev) dev_put(llc->dev); sock_put(sk); From a957fa190aa9d9168b33d460a5241a6d088c6265 Mon Sep 17 00:00:00 2001 From: Ahmed Abdelsalam Date: Fri, 20 Apr 2018 15:58:05 +0200 Subject: [PATCH 429/660] ipv6: sr: fix NULL pointer dereference in seg6_do_srh_encap()- v4 pkts In case of seg6 in encap mode, seg6_do_srh_encap() calls set_tun_src() in order to set the src addr of outer IPv6 header. The net_device is required for set_tun_src(). However calling ip6_dst_idev() on dst_entry in case of IPv4 traffic results on the following bug. Using just dst->dev should fix this BUG. [ 196.242461] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 196.242975] PGD 800000010f076067 P4D 800000010f076067 PUD 10f060067 PMD 0 [ 196.243329] Oops: 0000 [#1] SMP PTI [ 196.243468] Modules linked in: nfsd auth_rpcgss nfs_acl nfs lockd grace fscache sunrpc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd input_leds glue_helper led_class pcspkr serio_raw mac_hid video autofs4 hid_generic usbhid hid e1000 i2c_piix4 ahci pata_acpi libahci [ 196.244362] CPU: 2 PID: 1089 Comm: ping Not tainted 4.16.0+ #1 [ 196.244606] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 196.244968] RIP: 0010:seg6_do_srh_encap+0x1ac/0x300 [ 196.245236] RSP: 0018:ffffb2ce00b23a60 EFLAGS: 00010202 [ 196.245464] RAX: 0000000000000000 RBX: ffff8c7f53eea300 RCX: 0000000000000000 [ 196.245742] RDX: 0000f10000000000 RSI: ffff8c7f52085a6c RDI: ffff8c7f41166850 [ 196.246018] RBP: ffffb2ce00b23aa8 R08: 00000000000261e0 R09: ffff8c7f41166800 [ 196.246294] R10: ffffdce5040ac780 R11: ffff8c7f41166828 R12: ffff8c7f41166808 [ 196.246570] R13: ffff8c7f52085a44 R14: ffffffffb73211c0 R15: ffff8c7e69e44200 [ 196.246846] FS: 00007fc448789700(0000) GS:ffff8c7f59d00000(0000) knlGS:0000000000000000 [ 196.247286] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 196.247526] CR2: 0000000000000000 CR3: 000000010f05a000 CR4: 00000000000406e0 [ 196.247804] Call Trace: [ 196.247972] seg6_do_srh+0x15b/0x1c0 [ 196.248156] seg6_output+0x3c/0x220 [ 196.248341] ? prandom_u32+0x14/0x20 [ 196.248526] ? ip_idents_reserve+0x6c/0x80 [ 196.248723] ? __ip_select_ident+0x90/0x100 [ 196.248923] ? ip_append_data.part.50+0x6c/0xd0 [ 196.249133] lwtunnel_output+0x44/0x70 [ 196.249328] ip_send_skb+0x15/0x40 [ 196.249515] raw_sendmsg+0x8c3/0xac0 [ 196.249701] ? _copy_from_user+0x2e/0x60 [ 196.249897] ? rw_copy_check_uvector+0x53/0x110 [ 196.250106] ? _copy_from_user+0x2e/0x60 [ 196.250299] ? copy_msghdr_from_user+0xce/0x140 [ 196.250508] sock_sendmsg+0x36/0x40 [ 196.250690] ___sys_sendmsg+0x292/0x2a0 [ 196.250881] ? _cond_resched+0x15/0x30 [ 196.251074] ? copy_termios+0x1e/0x70 [ 196.251261] ? _copy_to_user+0x22/0x30 [ 196.251575] ? tty_mode_ioctl+0x1c3/0x4e0 [ 196.251782] ? _cond_resched+0x15/0x30 [ 196.251972] ? mutex_lock+0xe/0x30 [ 196.252152] ? vvar_fault+0xd2/0x110 [ 196.252337] ? __do_fault+0x1f/0xc0 [ 196.252521] ? __handle_mm_fault+0xc1f/0x12d0 [ 196.252727] ? __sys_sendmsg+0x63/0xa0 [ 196.252919] __sys_sendmsg+0x63/0xa0 [ 196.253107] do_syscall_64+0x72/0x200 [ 196.253305] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 196.253530] RIP: 0033:0x7fc4480b0690 [ 196.253715] RSP: 002b:00007ffde9f252f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 196.254053] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007fc4480b0690 [ 196.254331] RDX: 0000000000000000 RSI: 000000000060a360 RDI: 0000000000000003 [ 196.254608] RBP: 00007ffde9f253f0 R08: 00000000002d1e81 R09: 0000000000000002 [ 196.254884] R10: 00007ffde9f250c0 R11: 0000000000000246 R12: 0000000000b22070 [ 196.255205] R13: 20c49ba5e353f7cf R14: 431bde82d7b634db R15: 00007ffde9f278fe [ 196.255484] Code: a5 0f b6 45 c0 41 88 41 28 41 0f b6 41 2c 48 c1 e0 04 49 8b 54 01 38 49 8b 44 01 30 49 89 51 20 49 89 41 18 48 8b 83 b0 00 00 00 <48> 8b 30 49 8b 86 08 0b 00 00 48 8b 40 20 48 8b 50 08 48 0b 10 [ 196.256190] RIP: seg6_do_srh_encap+0x1ac/0x300 RSP: ffffb2ce00b23a60 [ 196.256445] CR2: 0000000000000000 [ 196.256676] ---[ end trace 71af7d093603885c ]--- Fixes: 8936ef7604c11 ("ipv6: sr: fix NULL pointer dereference when setting encap source address") Signed-off-by: Ahmed Abdelsalam Acked-by: David Lebrun Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index f343e6f0fc95..5fe139484919 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -136,7 +136,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { From 7c5aba211dd61f41d737a2c51729eb9fdcd3edf4 Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Fri, 20 Apr 2018 12:11:11 -0700 Subject: [PATCH 430/660] strparser: Do not call mod_delayed_work with a timeout of LONG_MAX struct sock's sk_rcvtimeo is initialized to LONG_MAX/MAX_SCHEDULE_TIMEOUT in sock_init_data. Calling mod_delayed_work with a timeout of LONG_MAX causes spurious execution of the work function. timer->expires is set equal to jiffies + LONG_MAX. When timer_base->clk falls behind the current value of jiffies, the delta between timer_base->clk and jiffies + LONG_MAX causes the expiration to be in the past. Returning early from strp_start_timer if timeo == LONG_MAX solves this problem. Found while testing net/tls_sw recv path. Fixes: 43a0c6751a322847 ("strparser: Stream parser for messages") Reviewed-by: Tejun Heo Signed-off-by: Doron Roberts-Kedes Signed-off-by: David S. Miller --- net/strparser/strparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 805b139756db..092bebc70048 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -67,7 +67,7 @@ static void strp_abort_strp(struct strparser *strp, int err) static void strp_start_timer(struct strparser *strp, long timeo) { - if (timeo) + if (timeo && timeo != LONG_MAX) mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo); } From f6cd14537ff9919081be19b9c53b9b19c0d3ea97 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 15:15:03 -0400 Subject: [PATCH 431/660] net: sched: ife: signal not finding metaid We need to record stats for received metadata that we dont know how to process. Have find_decode_metaid() return -ENOENT to capture this. Signed-off-by: Alexander Aring Reviewed-by: Yotam Gigi Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_ife.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a5994cf0512b..49b8ab551fbe 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -652,7 +652,7 @@ static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, } } - return 0; + return -ENOENT; } static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, From cc74eddd0ff325d57373cea99f642b787d7f76f5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 15:15:04 -0400 Subject: [PATCH 432/660] net: sched: ife: handle malformed tlv length There is currently no handling to check on a invalid tlv length. This patch adds such handling to avoid killing the kernel with a malformed ife packet. Signed-off-by: Alexander Aring Reviewed-by: Yotam Gigi Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/ife.h | 3 ++- net/ife/ife.c | 35 +++++++++++++++++++++++++++++++++-- net/sched/act_ife.c | 7 ++++++- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/include/net/ife.h b/include/net/ife.h index 44b9c00f7223..e117617e3c34 100644 --- a/include/net/ife.h +++ b/include/net/ife.h @@ -12,7 +12,8 @@ void *ife_encode(struct sk_buff *skb, u16 metalen); void *ife_decode(struct sk_buff *skb, u16 *metalen); -void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen); +void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype, + u16 *dlen, u16 *totlen); int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval); diff --git a/net/ife/ife.c b/net/ife/ife.c index 7d1ec76e7f43..7fbe70a0af4b 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -92,12 +92,43 @@ struct meta_tlvhdr { __be16 len; }; +static bool __ife_tlv_meta_valid(const unsigned char *skbdata, + const unsigned char *ifehdr_end) +{ + const struct meta_tlvhdr *tlv; + u16 tlvlen; + + if (unlikely(skbdata + sizeof(*tlv) > ifehdr_end)) + return false; + + tlv = (const struct meta_tlvhdr *)skbdata; + tlvlen = ntohs(tlv->len); + + /* tlv length field is inc header, check on minimum */ + if (tlvlen < NLA_HDRLEN) + return false; + + /* overflow by NLA_ALIGN check */ + if (NLA_ALIGN(tlvlen) < tlvlen) + return false; + + if (unlikely(skbdata + NLA_ALIGN(tlvlen) > ifehdr_end)) + return false; + + return true; +} + /* Caller takes care of presenting data in network order */ -void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen) +void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype, + u16 *dlen, u16 *totlen) { - struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata; + struct meta_tlvhdr *tlv; + if (!__ife_tlv_meta_valid(skbdata, ifehdr_end)) + return NULL; + + tlv = (struct meta_tlvhdr *)skbdata; *dlen = ntohs(tlv->len) - NLA_HDRLEN; *attrtype = ntohs(tlv->type); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 49b8ab551fbe..8527cfdc446d 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -682,7 +682,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u16 mtype; u16 dlen; - curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL); + curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype, + &dlen, NULL); + if (!curr_data) { + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); + return TC_ACT_SHOT; + } if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata From d57493d6d1be26c8ac8516a4463bfe24956978eb Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 15:15:05 -0400 Subject: [PATCH 433/660] net: sched: ife: check on metadata length This patch checks if sk buffer is available to dererence ife header. If not then NULL will returned to signal an malformed ife packet. This avoids to crashing the kernel from outside. Signed-off-by: Alexander Aring Reviewed-by: Yotam Gigi Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/ife/ife.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ife/ife.c b/net/ife/ife.c index 7fbe70a0af4b..13bbf8cb6a39 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -69,6 +69,9 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) int total_pull; u16 ifehdrln; + if (!pskb_may_pull(skb, skb->dev->hard_header_len + IFE_METAHDRLEN)) + return NULL; + ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len); ifehdrln = ntohs(ifehdr->metalen); total_pull = skb->dev->hard_header_len + ifehdrln; From 660e309ddd6aa99bb4d2a859c4a0b56965e744ef Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 20 Apr 2018 14:25:32 -0500 Subject: [PATCH 434/660] ibmvnic: Clean actual number of RX or TX pools Avoid using value stored in the login response buffer when cleaning TX and RX buffer pools since these could be inconsistent depending on the device state. Instead use the field in the driver's private data that tracks the number of active pools. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2df01ad98df7..6e8d6a6f6aaf 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1128,7 +1128,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter) if (!adapter->rx_pool) return; - rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); + rx_scrqs = adapter->num_active_rx_pools; rx_entries = adapter->req_rx_add_entries_per_subcrq; /* Free any remaining skbs in the rx buffer pools */ @@ -1177,7 +1177,7 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter) if (!adapter->tx_pool || !adapter->tso_pool) return; - tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); + tx_scrqs = adapter->num_active_tx_pools; /* Free any remaining skbs in the tx buffer pools */ for (i = 0; i < tx_scrqs; i++) { From 6d08b06e67cd117f6992c46611dfb4ce267cd71e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Apr 2018 19:20:09 -0700 Subject: [PATCH 435/660] Linux 4.17-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e811e0c509c5..83b6c541565a 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Fearless Coyote # *DOCUMENTATION* From f18a36cf3f31dce633fe2a93d0059008bb21d9f2 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 6 Apr 2018 16:30:47 +0100 Subject: [PATCH 436/660] hwmon: (scmi) handle absence of few types of sensors Currently the loop checks for non-zero count of sensors for each type of sensors which is completely wrong. It also results in aborting the registration of sensors if one or more types of sensors are completely not supported by the platform SCMI firmware. This patch fixes the issue by continue to loop and skiping sensor types that are not present. Fixes: b23688aefb8b ("hwmon: add support for sensors exported via ARM SCMI") Reported-by: Jim Quinlan Cc: Guenter Roeck Cc: linux-hwmon@vger.kernel.org Signed-off-by: Sudeep Holla Signed-off-by: Guenter Roeck --- drivers/hwmon/scmi-hwmon.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c index 363bf56eb0f2..91976b6ca300 100644 --- a/drivers/hwmon/scmi-hwmon.c +++ b/drivers/hwmon/scmi-hwmon.c @@ -170,7 +170,10 @@ static int scmi_hwmon_probe(struct scmi_device *sdev) scmi_chip_info.info = ptr_scmi_ci; chip_info = &scmi_chip_info; - for (type = 0; type < hwmon_max && nr_count[type]; type++) { + for (type = 0; type < hwmon_max; type++) { + if (!nr_count[type]) + continue; + scmi_hwmon_add_chan_info(scmi_hwmon_chan, dev, nr_count[type], type, hwmon_attributes[type]); *ptr_scmi_ci++ = scmi_hwmon_chan++; From af2e460ade0b0180d0f3812ca4f4f59cc9597f3e Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 11 Apr 2018 11:21:17 +0200 Subject: [PATCH 437/660] s390/cio: update chpid descriptor after resource accessibility event Channel path descriptors have been seen as something stable (as long as the chpid is configured). Recent tests have shown that the descriptor can also be altered when the link state of a channel path changes. Thus it is necessary to update the descriptor during handling of resource accessibility events. Cc: Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 6652a49a49b1..9029804dcd22 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -452,6 +452,7 @@ static void chsc_process_sei_link_incident(struct chsc_sei_nt0_area *sei_area) static void chsc_process_sei_res_acc(struct chsc_sei_nt0_area *sei_area) { + struct channel_path *chp; struct chp_link link; struct chp_id chpid; int status; @@ -464,10 +465,17 @@ static void chsc_process_sei_res_acc(struct chsc_sei_nt0_area *sei_area) chpid.id = sei_area->rsid; /* allocate a new channel path structure, if needed */ status = chp_get_status(chpid); - if (status < 0) - chp_new(chpid); - else if (!status) + if (!status) return; + + if (status < 0) { + chp_new(chpid); + } else { + chp = chpid_to_chp(chpid); + mutex_lock(&chp->lock); + chp_update_desc(chp); + mutex_unlock(&chp->lock); + } memset(&link, 0, sizeof(struct chp_link)); link.chpid = chpid; if ((sei_area->vf & 0xc0) != 0) { From 783c3b53b9506db3e05daacfe34e0287eebb09d8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Apr 2018 12:22:24 +0200 Subject: [PATCH 438/660] s390/uprobes: implement arch_uretprobe_is_alive() Implement s390 specific arch_uretprobe_is_alive() to avoid SIGSEGVs observed with uretprobes in combination with setjmp/longjmp. See commit 2dea1d9c38e4 ("powerpc/uprobes: Implement arch_uretprobe_is_alive()") for more details. With this implemented all test cases referenced in the above commit pass. Reported-by: Ziqian SUN Cc: # v4.3+ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/uprobes.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index d9d1f512f019..5007fac01bb5 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -150,6 +150,15 @@ unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, return orig; } +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) +{ + if (ctx == RP_CHECK_CHAIN_CALL) + return user_stack_pointer(regs) <= ret->stack; + else + return user_stack_pointer(regs) < ret->stack; +} + /* Instruction Emulation */ static void adjust_psw_addr(psw_t *psw, unsigned long len) From 5d27a2bf6e14f5c7d1033ad1e993fcd0eba43e83 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 12 Apr 2018 13:38:22 +0200 Subject: [PATCH 439/660] s390/dasd: fix IO error for newly defined devices When a new CKD storage volume is defined at the storage server, Linux may be relying on outdated information about that volume, which leads to the following errors: 1. Command Reject Errors for minidisk on z/VM: dasd-eckd.b3193d: 0.0.XXXX: An error occurred in the DASD device driver, reason=09 dasd(eckd): I/O status report for device 0.0.XXXX: dasd(eckd): in req: 00000000XXXXXXXX CC:00 FC:04 AC:00 SC:17 DS:02 CS:00 RC:0 dasd(eckd): device 0.0.2046: Failing CCW: 00000000XXXXXXXX dasd(eckd): Sense(hex) 0- 7: 80 00 00 00 00 00 00 00 dasd(eckd): Sense(hex) 8-15: 00 00 00 00 00 00 00 00 dasd(eckd): Sense(hex) 16-23: 00 00 00 00 e1 00 0f 00 dasd(eckd): Sense(hex) 24-31: 00 00 40 e2 00 00 00 00 dasd(eckd): 24 Byte: 0 MSG 0, no MSGb to SYSOP 2. Equipment Check errors on LPAR or for dedicated devices on z/VM: dasd(eckd): I/O status report for device 0.0.XXXX: dasd(eckd): in req: 00000000XXXXXXXX CC:00 FC:04 AC:00 SC:17 DS:0E CS:40 fcxs:01 schxs:00 RC:0 dasd(eckd): device 0.0.9713: Failing TCW: 00000000XXXXXXXX dasd(eckd): Sense(hex) 0- 7: 10 00 00 00 13 58 4d 0f dasd(eckd): Sense(hex) 8-15: 67 00 00 00 00 00 00 04 dasd(eckd): Sense(hex) 16-23: e5 18 05 33 97 01 0f 0f dasd(eckd): Sense(hex) 24-31: 00 00 40 e2 00 04 58 0d dasd(eckd): 24 Byte: 0 MSG f, no MSGb to SYSOP Fix this problem by using the up-to-date information provided during online processing via the device specific SNEQ to detect the case of outdated LCU data. If there is a difference, perform a re-read of that data. Cc: stable@vger.kernel.org Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_alias.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 62f5f04d8f61..5e963fe0e38d 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -592,13 +592,22 @@ static int _schedule_lcu_update(struct alias_lcu *lcu, int dasd_alias_add_device(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; - struct alias_lcu *lcu; + __u8 uaddr = private->uid.real_unit_addr; + struct alias_lcu *lcu = private->lcu; unsigned long flags; int rc; - lcu = private->lcu; rc = 0; spin_lock_irqsave(&lcu->lock, flags); + /* + * Check if device and lcu type differ. If so, the uac data may be + * outdated and needs to be updated. + */ + if (private->uid.type != lcu->uac->unit[uaddr].ua_type) { + lcu->flags |= UPDATE_PENDING; + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "uid type mismatch - trigger rescan"); + } if (!(lcu->flags & UPDATE_PENDING)) { rc = _add_device_to_lcu(lcu, device, device); if (rc) From 5f3ba878e7a2ffef82fb0882c0dd2c3507d734bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Wild?= Date: Wed, 18 Apr 2018 17:59:58 +0200 Subject: [PATCH 440/660] s390/cpum_cf: rename IBM z13/z14 counter names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the IBM z13/z14 counter names to be in sync with all other models. Cc: stable@vger.kernel.org # v4.12+ Fixes: 3593eb944c ("s390/cpum_cf: add hardware counter support for IBM z14") Fixes: 3fc7acebae ("s390/cpum_cf: add IBM z13 counter event names") Signed-off-by: André Wild Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf_events.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 5ee27dc9a10c..feebb2944882 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -123,7 +123,7 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); -CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080); +CPUMF_EVENT_ATTR(cf_z13, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082); CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083); @@ -179,7 +179,7 @@ CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db); CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc); CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); -CPUMF_EVENT_ATTR(cf_z14, L1D_WRITES_RO_EXCL, 0x0080); +CPUMF_EVENT_ATTR(cf_z14, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082); CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083); @@ -371,7 +371,7 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { }; static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { - CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL), + CPUMF_EVENT_PTR(cf_z13, L1D_RO_EXCL_WRITES), CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES), CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES), CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES), @@ -431,7 +431,7 @@ static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { }; static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = { - CPUMF_EVENT_PTR(cf_z14, L1D_WRITES_RO_EXCL), + CPUMF_EVENT_PTR(cf_z14, L1D_RO_EXCL_WRITES), CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES), CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES), CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES), From 2317b07d05d2b136eb4dc9609807c9111bda3b2a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 20 Apr 2018 10:21:21 +0200 Subject: [PATCH 441/660] s390: update sampling tag after task pid change In a multi-threaded program any thread can call execve(). If this is not done by the thread group leader, the de_thread() function replaces the pid of the task that calls execve() with the pid of thread group leader. If the task reaches user space again without going over __switch_to() the sampling tag is still set to the old pid. Define the arch_setup_new_exec function to verify the task pid and udpate the tag with LPP if it has changed. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/thread_info.h | 3 +++ arch/s390/kernel/process.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 83ba57533ce6..3c883c368eb0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -45,6 +45,9 @@ struct thread_info { void arch_release_task_struct(struct task_struct *tsk); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); +void arch_setup_new_exec(void); +#define arch_setup_new_exec arch_setup_new_exec + #endif /* diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 70576a2f69cf..6e758bb6cd29 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,15 @@ void flush_thread(void) { } +void arch_setup_new_exec(void) +{ + if (S390_lowcore.current_pid != current->pid) { + S390_lowcore.current_pid = current->pid; + if (test_facility(40)) + lpp(&S390_lowcore.lpp); + } +} + void arch_release_task_struct(struct task_struct *tsk) { runtime_instr_release(tsk); From 3368e547c52b96586f0edf9657ca12b94d8e61a7 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 20 Apr 2018 10:24:04 +0200 Subject: [PATCH 442/660] vfio: ccw: process ssch with interrupts disabled When we call ssch, an interrupt might already be pending once we return from the START SUBCHANNEL instruction. Therefore we need to make sure interrupts are disabled while holding the subchannel lock until after we're done with our processing. Cc: stable@vger.kernel.org #v4.12+ Reviewed-by: Dong Jia Shi Acked-by: Halil Pasic Acked-by: Pierre Morel Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/vfio_ccw_fsm.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index ff6963ad6e39..3c800642134e 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -20,12 +20,12 @@ static int fsm_io_helper(struct vfio_ccw_private *private) int ccode; __u8 lpm; unsigned long flags; + int ret; sch = private->sch; spin_lock_irqsave(sch->lock, flags); private->state = VFIO_CCW_STATE_BUSY; - spin_unlock_irqrestore(sch->lock, flags); orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm); @@ -38,10 +38,12 @@ static int fsm_io_helper(struct vfio_ccw_private *private) * Initialize device status information */ sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND; - return 0; + ret = 0; + break; case 1: /* Status pending */ case 2: /* Busy */ - return -EBUSY; + ret = -EBUSY; + break; case 3: /* Device/path not operational */ { lpm = orb->cmd.lpm; @@ -51,13 +53,16 @@ static int fsm_io_helper(struct vfio_ccw_private *private) sch->lpm = 0; if (cio_update_schib(sch)) - return -ENODEV; - - return sch->lpm ? -EACCES : -ENODEV; + ret = -ENODEV; + else + ret = sch->lpm ? -EACCES : -ENODEV; + break; } default: - return ccode; + ret = ccode; } + spin_unlock_irqrestore(sch->lock, flags); + return ret; } static void fsm_notoper(struct vfio_ccw_private *private, From 6cf09958f32b9667bb3ebadf74367c791112771b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 20 Apr 2018 12:48:52 +0200 Subject: [PATCH 443/660] s390: correct module section names for expoline code revert The main linker script vmlinux.lds.S for the kernel image merges the expoline code patch tables into two section ".nospec_call_table" and ".nospec_return_table". This is *not* done for the modules, there the sections retain their original names as generated by gcc: ".s390_indirect_call", ".s390_return_mem" and ".s390_return_reg". The module_finalize code has to check for the compiler generated section names, otherwise no code patching is done. This slows down the module code in case of "spectre_v2=off". Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 5a83be955c70..0dc8ac8548ee 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -465,11 +465,11 @@ int module_finalize(const Elf_Ehdr *hdr, apply_alternatives(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && - (!strcmp(".nospec_call_table", secname))) + (!strncmp(".s390_indirect", secname, 14))) nospec_revert(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && - (!strcmp(".nospec_return_table", secname))) + (!strncmp(".s390_return", secname, 12))) nospec_revert(aseg, aseg + s->sh_size); } From 912e4c332037e7ed063c164985c36fb2b549ea3a Mon Sep 17 00:00:00 2001 From: Jeffery Miller Date: Fri, 20 Apr 2018 23:20:46 -0500 Subject: [PATCH 444/660] ALSA: pcm: Return negative delays from SNDRV_PCM_IOCTL_DELAY. The commit c2c86a97175f ("ALSA: pcm: Remove set_fs() in PCM core code") changed SNDRV_PCM_IOCTL_DELAY to return an inconsistent error instead of a negative delay. Originally the call would succeed and return the negative delay. The Chromium OS Audio Server (CRAS) gets confused and hangs when the error is returned instead of the negative delay. Help CRAS avoid the issue by rolling back the behavior to return a negative delay instead of an error. Fixes: c2c86a97175f ("ALSA: pcm: Remove set_fs() in PCM core code") Signed-off-by: Jeffery Miller Cc: # v4.13+ Signed-off-by: Takashi Iwai --- sound/core/pcm_compat.c | 7 ++++--- sound/core/pcm_native.c | 23 +++++++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index b719d0bd833e..06d7c40af570 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -27,10 +27,11 @@ static int snd_pcm_ioctl_delay_compat(struct snd_pcm_substream *substream, s32 __user *src) { snd_pcm_sframes_t delay; + int err; - delay = snd_pcm_delay(substream); - if (delay < 0) - return delay; + err = snd_pcm_delay(substream, &delay); + if (err) + return err; if (put_user(delay, src)) return -EFAULT; return 0; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 35ffccea94c3..06aa499543b6 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2692,7 +2692,8 @@ static int snd_pcm_hwsync(struct snd_pcm_substream *substream) return err; } -static snd_pcm_sframes_t snd_pcm_delay(struct snd_pcm_substream *substream) +static int snd_pcm_delay(struct snd_pcm_substream *substream, + snd_pcm_sframes_t *delay) { struct snd_pcm_runtime *runtime = substream->runtime; int err; @@ -2708,7 +2709,9 @@ static snd_pcm_sframes_t snd_pcm_delay(struct snd_pcm_substream *substream) n += runtime->delay; } snd_pcm_stream_unlock_irq(substream); - return err < 0 ? err : n; + if (!err) + *delay = n; + return err; } static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream, @@ -2916,11 +2919,13 @@ static int snd_pcm_common_ioctl(struct file *file, return snd_pcm_hwsync(substream); case SNDRV_PCM_IOCTL_DELAY: { - snd_pcm_sframes_t delay = snd_pcm_delay(substream); + snd_pcm_sframes_t delay; snd_pcm_sframes_t __user *res = arg; + int err; - if (delay < 0) - return delay; + err = snd_pcm_delay(substream, &delay); + if (err) + return err; if (put_user(delay, res)) return -EFAULT; return 0; @@ -3008,13 +3013,7 @@ int snd_pcm_kernel_ioctl(struct snd_pcm_substream *substream, case SNDRV_PCM_IOCTL_DROP: return snd_pcm_drop(substream); case SNDRV_PCM_IOCTL_DELAY: - { - result = snd_pcm_delay(substream); - if (result < 0) - return result; - *frames = result; - return 0; - } + return snd_pcm_delay(substream, frames); default: return -EINVAL; } From f853dcaae2f5bbe021161e421bd1576845bae8f6 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Sat, 21 Apr 2018 14:57:40 +0200 Subject: [PATCH 445/660] ALSA: core: Report audio_tstamp in snd_pcm_sync_ptr It looks like a simple mistake that this struct member was forgotten. Audio_tstamp isn't used much, and on some archs (such as x86) this ioctl is not used by default, so that might be the reason why this has slipped for so long. Fixes: 4eeaaeaea1ce ("ALSA: core: add hooks for audio timestamps") Signed-off-by: David Henningsson Reviewed-by: Takashi Sakamoto Cc: # v3.8+ Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 06aa499543b6..159706cf8f05 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2754,6 +2754,7 @@ static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream, sync_ptr.s.status.hw_ptr = status->hw_ptr; sync_ptr.s.status.tstamp = status->tstamp; sync_ptr.s.status.suspended_state = status->suspended_state; + sync_ptr.s.status.audio_tstamp = status->audio_tstamp; snd_pcm_stream_unlock_irq(substream); if (copy_to_user(_sync_ptr, &sync_ptr, sizeof(sync_ptr))) return -EFAULT; From 855c1c2fce8bdbd796cba1d1456ca8f0e876c2f1 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 10 Apr 2018 23:07:51 +0800 Subject: [PATCH 446/660] ACPI / PM: Blacklist Low Power S0 Idle _DSM for ThinkPad X1 Tablet(2016) ThinkPad X1 Tablet(2016) is reported to have issues with the Low Power S0 Idle _DSM interface and since this machine model generally can do ACPI S3 just fine, and user would like to use S3 as default sleep model, add a blacklist entry to disable that interface for ThinkPad X1 Tablet(2016). Link: https://bugzilla.kernel.org/show_bug.cgi?id=199057 Reported-and-tested-by: Robin Lee Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 99a1a650326d..974e58457697 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -364,6 +364,19 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"), }, }, + /* + * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using + * the Low Power S0 Idle firmware interface (see + * https://bugzilla.kernel.org/show_bug.cgi?id=199057). + */ + { + .callback = init_no_lps0, + .ident = "ThinkPad X1 Tablet(2016)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), + }, + }, {}, }; From cc6a0e315a68e5db85bea347b0c5b0fe4a9a5904 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 19 Apr 2018 13:08:37 +0300 Subject: [PATCH 447/660] ACPI / scan: Initialize watchdog before PNP At least on one Dell system the PNP motherboard resources device includes resources used by WDAT table. Since PNP gets initialized before WDAT it results following error and no watchdog: platform wdat_wdt: failed to claim resource 3: [io 0x046a-0x046c] ACPI: watchdog: Device creation failed: -16 Now, the PNP system driver is already accustomed with the situation that it cannot reserve all those motherboard resources because drivers using those might have reserved them already. In addition putting WDAT table resources under motherboard resources device makes sense in general. Fix this by initializing WDAT right before PNP. This allows WDAT to reserve all its resources and still keeps PNP system driver happy. Reported-by: Shubhrata.Priyadarsh@dell.com Reported-by: Takashi Iwai Signed-off-by: Mika Westerberg Acked-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index cc234e6a6297..970dd87d347c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2166,10 +2166,10 @@ int __init acpi_scan_init(void) acpi_cmos_rtc_init(); acpi_container_init(); acpi_memory_hotplug_init(); + acpi_watchdog_init(); acpi_pnp_init(); acpi_int340x_thermal_init(); acpi_amba_init(); - acpi_watchdog_init(); acpi_init_lpit(); acpi_scan_add_handler(&generic_device_handler); From ae860a19f37c686e7c5816e96640168b7174a096 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 20 Apr 2018 15:22:02 +0300 Subject: [PATCH 448/660] PCI / PM: Do not clear state_saved in pci_pm_freeze() when smart suspend is set If a driver uses DPM_FLAG_SMART_SUSPEND and the device is already runtime suspended when hibernate is started PCI core skips runtime resuming the device but still clears pci_dev->state_saved. After the hibernation image is written pci_pm_thaw_noirq() makes sure subsequent thaw phases for the device are also skipped leaving it runtime suspended with pci_dev->state_saved == false. When the device is eventually runtime resumed pci_pm_runtime_resume() restores config space by calling pci_restore_standard_config(), however because pci_dev->state_saved == false pci_restore_state() never actually restores the config space leaving the device in a state that is not what the driver might expect. For example here is what happens for intel-lpss I2C devices once the hibernation snapshot is taken: intel-lpss 0000:00:15.0: power state changed by ACPI to D0 intel-lpss 0000:00:1e.0: power state changed by ACPI to D3cold video LNXVIDEO:00: Restoring backlight state PM: hibernation exit i2c_designware i2c_designware.1: Unknown Synopsys component type: 0xffffffff i2c_designware i2c_designware.0: Unknown Synopsys component type: 0xffffffff i2c_designware i2c_designware.1: timeout in disabling adapter i2c_designware i2c_designware.0: timeout in disabling adapter Since PCI config space is not restored the device is still in D3hot making MMIO register reads return 0xffffffff. Fix this by clearing pci_dev->state_saved only if we actually end up runtime resuming the device. Fixes: c4b65157aeef (PCI / PM: Take SMART_SUSPEND driver flag into account) Signed-off-by: Mika Westerberg Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-driver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 6ace47099fc5..b9a131137e64 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -958,10 +958,11 @@ static int pci_pm_freeze(struct device *dev) * devices should not be touched during freeze/thaw transitions, * however. */ - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) + if (!dev_pm_smart_suspend_and_suspended(dev)) { pm_runtime_resume(dev); + pci_dev->state_saved = false; + } - pci_dev->state_saved = false; if (pm->freeze) { int error; From 350f76dc063934215a877ef582206763c8a80395 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 18 Apr 2018 21:39:51 +0200 Subject: [PATCH 449/660] usb: core: phy: add the SPDX-License-Identifier and include guard This clarifies the license of the code. While here also add an include guard to the header file. Fixes: 07dbff0ddbd86c ("usb: core: add a wrapper for the USB PHYs on the HCD") Suggested-by: Masahiro Yamada Signed-off-by: Martin Blumenstingl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/phy.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/core/phy.h b/drivers/usb/core/phy.h index bbc969383074..88a3c037e9df 100644 --- a/drivers/usb/core/phy.h +++ b/drivers/usb/core/phy.h @@ -1,3 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * USB roothub wrapper + * + * Copyright (C) 2018 Martin Blumenstingl + */ + +#ifndef __USB_CORE_PHY_H_ +#define __USB_CORE_PHY_H_ + struct device; struct usb_phy_roothub; @@ -13,3 +23,5 @@ int usb_phy_roothub_suspend(struct device *controller_dev, struct usb_phy_roothub *phy_roothub); int usb_phy_roothub_resume(struct device *controller_dev, struct usb_phy_roothub *phy_roothub); + +#endif /* __USB_CORE_PHY_H_ */ From 6d215f83e5fccb3dd023e97fef1bd0029bfedde9 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 19 Apr 2018 17:39:16 +0200 Subject: [PATCH 450/660] serial: imx: warn user when using unsupported configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using half-duplex mode (which disables receiver during txing) the RTS signal cannot be driven low during transmission when using i.MX UART RTS/CTS control. This seems to be a limitation of the i.MX UART IP: The RTS (CTS_B) signal is controlled by the receiver. When the receiver is disabled, the signal stays in UART logic idle state which is high... If SER_RS485_RTS_ON_SEND is used, RTS needs to be high active during transmission. Since this is the default state of the RTS (CTS_B) signal when the receiver is off, half-duplex mode in this configuration works fine. However, a low-active RTS signal (flag SER_RS485_RTS_ON_SEND not set) cannot be generated when the receiver is turned off. Print an error if the user selects this unsupported configuration (both SER_RS485_RTS_ON_SEND and SER_RS485_RX_DURING_TX unset) and configure the closest working configuration (set the SER_RS485_RX_DURING_TX flag). Signed-off-by: Stefan Agner Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 91f3a1a5cb7f..65d7a2bfb6d2 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1833,6 +1833,11 @@ static int imx_uart_rs485_config(struct uart_port *port, rs485conf->flags &= ~SER_RS485_ENABLED; if (rs485conf->flags & SER_RS485_ENABLED) { + /* Enable receiver if low-active RTS signal is requested */ + if (sport->have_rtscts && !sport->have_rtsgpio && + !(rs485conf->flags & SER_RS485_RTS_ON_SEND)) + rs485conf->flags |= SER_RS485_RX_DURING_TX; + /* disable transmitter */ ucr2 = imx_uart_readl(sport, UCR2); if (rs485conf->flags & SER_RS485_RTS_AFTER_SEND) @@ -2265,6 +2270,18 @@ static int imx_uart_probe(struct platform_device *pdev) (!sport->have_rtscts && !sport->have_rtsgpio)) dev_err(&pdev->dev, "no RTS control, disabling rs485\n"); + /* + * If using the i.MX UART RTS/CTS control then the RTS (CTS_B) + * signal cannot be set low during transmission in case the + * receiver is off (limitation of the i.MX UART IP). + */ + if (sport->port.rs485.flags & SER_RS485_ENABLED && + sport->have_rtscts && !sport->have_rtsgpio && + (!(sport->port.rs485.flags & SER_RS485_RTS_ON_SEND) && + !(sport->port.rs485.flags & SER_RS485_RX_DURING_TX))) + dev_err(&pdev->dev, + "low-active RTS not possible when receiver is off, enabling receiver\n"); + imx_uart_rs485_config(&sport->port, &sport->port.rs485); /* Disable interrupts before requesting them */ From 0aa821d846c0590ad64a00af95a3dcc29263d70f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 20 Apr 2018 14:44:07 +0200 Subject: [PATCH 451/660] serial: imx: fix cached UCR2 read on software reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To reset the UART the SRST needs be cleared (low active). According to the documentation the bit will remain active for 4 module clocks until it is cleared (set to 1). Hence the real register need to be read in case the cached register indicates that the SRST bit is zero. This bug lead to wrong baudrate because the baud rate register got restored before reset completed in imx_flush_buffer. Fixes: 3a0ab62f43de ("serial: imx: implement shadow registers for UCRx and UFCR") Signed-off-by: Stefan Agner Reviewed-by: Fabio Estevam Reviewed-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 65d7a2bfb6d2..c2fc6bef7a6f 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -316,7 +316,7 @@ static u32 imx_uart_readl(struct imx_port *sport, u32 offset) * differ from the value that was last written. As it only * clears after being set, reread conditionally. */ - if (sport->ucr2 & UCR2_SRST) + if (!(sport->ucr2 & UCR2_SRST)) sport->ucr2 = readl(sport->port.membase + offset); return sport->ucr2; break; From 34df2466b48dfe258e14fe2a7bc4641416575ade Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 23 Apr 2018 09:32:40 +0200 Subject: [PATCH 452/660] dt-bindings: meson-uart: DT fix s/clocks-names/clock-names/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt index 8ff65fa632fd..c06c045126fc 100644 --- a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt +++ b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt @@ -21,7 +21,7 @@ Required properties: - interrupts : identifier to the device interrupt - clocks : a list of phandle + clock-specifier pairs, one for each entry in clock names. -- clocks-names : +- clock-names : * "xtal" for external xtal clock identifier * "pclk" for the bus core clock, either the clk81 clock or the gate clock * "baud" for the source of the baudrate generator, can be either the xtal From ef9604b622ce3b77e0ec6b566a016ddd64c5deb0 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 9 Apr 2018 13:20:00 +0100 Subject: [PATCH 453/660] serial: sh-sci: Document r8a77470 bindings RZ/G1C (R8A77470) SoC also has the R-Car gen2 compatible SCIF and HSCIF ports, so document the SoC specific bindings. Signed-off-by: Biju Das Reviewed-by: Fabrizio Castro Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Reviewed-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/renesas,sci-serial.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt index ad962f4ec3aa..a006ea4d065f 100644 --- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt +++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt @@ -17,6 +17,8 @@ Required properties: - "renesas,scifa-r8a7745" for R8A7745 (RZ/G1E) SCIFA compatible UART. - "renesas,scifb-r8a7745" for R8A7745 (RZ/G1E) SCIFB compatible UART. - "renesas,hscif-r8a7745" for R8A7745 (RZ/G1E) HSCIF compatible UART. + - "renesas,scif-r8a77470" for R8A77470 (RZ/G1C) SCIF compatible UART. + - "renesas,hscif-r8a77470" for R8A77470 (RZ/G1C) HSCIF compatible UART. - "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART. - "renesas,scif-r8a7779" for R8A7779 (R-Car H1) SCIF compatible UART. - "renesas,scif-r8a7790" for R8A7790 (R-Car H2) SCIF compatible UART. From dd709e72cb934eefd44de8d9969097173fbf45dc Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Fri, 6 Apr 2018 17:21:53 -0600 Subject: [PATCH 454/660] earlycon: Use a pointer table to fix __earlycon_table stride Commit 99492c39f39f ("earlycon: Fix __earlycon_table stride") tried to fix __earlycon_table stride by forcing the earlycon_id struct alignment to 32 and asking the linker to 32-byte align the __earlycon_table symbol. This fix was based on commit 07fca0e57fca92 ("tracing: Properly align linker defined symbols") which tried a similar fix for the tracing subsystem. However, this fix doesn't quite work because there is no guarantee that gcc will place structures packed into an array format. In fact, gcc 4.9 chooses to 64-byte align these structs by inserting additional padding between the entries because it has no clue that they are supposed to be in an array. If we are unlucky, the linker will assign symbol "__earlycon_table" to a 32-byte aligned address which does not correspond to the 64-byte aligned contents of section "__earlycon_table". To address this same problem, the fix to the tracing system was subsequently re-implemented using a more robust table of pointers approach by commits: 3d56e331b653 ("tracing: Replace syscall_meta_data struct array with pointer array") 654986462939 ("tracepoints: Fix section alignment using pointer array") e4a9ea5ee7c8 ("tracing: Replace trace_event struct array with pointer array") Let's use this same "array of pointers to structs" approach for EARLYCON_TABLE. Fixes: 99492c39f39f ("earlycon: Fix __earlycon_table stride") Signed-off-by: Daniel Kurtz Suggested-by: Aaron Durbin Reviewed-by: Rob Herring Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/of/fdt.c | 7 +++++-- drivers/tty/serial/earlycon.c | 6 ++++-- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/serial_core.h | 21 ++++++++++++++------- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 84aa9d676375..6da20b9688f7 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -942,7 +942,7 @@ int __init early_init_dt_scan_chosen_stdout(void) int offset; const char *p, *q, *options = NULL; int l; - const struct earlycon_id *match; + const struct earlycon_id **p_match; const void *fdt = initial_boot_params; offset = fdt_path_offset(fdt, "/chosen"); @@ -969,7 +969,10 @@ int __init early_init_dt_scan_chosen_stdout(void) return 0; } - for (match = __earlycon_table; match < __earlycon_table_end; match++) { + for (p_match = __earlycon_table; p_match < __earlycon_table_end; + p_match++) { + const struct earlycon_id *match = *p_match; + if (!match->compatible[0]) continue; diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index a24278380fec..22683393a0f2 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -169,7 +169,7 @@ static int __init register_earlycon(char *buf, const struct earlycon_id *match) */ int __init setup_earlycon(char *buf) { - const struct earlycon_id *match; + const struct earlycon_id **p_match; if (!buf || !buf[0]) return -EINVAL; @@ -177,7 +177,9 @@ int __init setup_earlycon(char *buf) if (early_con.flags & CON_ENABLED) return -EALREADY; - for (match = __earlycon_table; match < __earlycon_table_end; match++) { + for (p_match = __earlycon_table; p_match < __earlycon_table_end; + p_match++) { + const struct earlycon_id *match = *p_match; size_t len = strlen(match->name); if (strncmp(buf, match->name, len)) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 278841c75b97..af240573e482 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -188,7 +188,7 @@ #endif #ifdef CONFIG_SERIAL_EARLYCON -#define EARLYCON_TABLE() STRUCT_ALIGN(); \ +#define EARLYCON_TABLE() . = ALIGN(8); \ VMLINUX_SYMBOL(__earlycon_table) = .; \ KEEP(*(__earlycon_table)) \ VMLINUX_SYMBOL(__earlycon_table_end) = .; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 1d356105f25a..b4c9fda9d833 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -351,10 +351,10 @@ struct earlycon_id { char name[16]; char compatible[128]; int (*setup)(struct earlycon_device *, const char *options); -} __aligned(32); +}; -extern const struct earlycon_id __earlycon_table[]; -extern const struct earlycon_id __earlycon_table_end[]; +extern const struct earlycon_id *__earlycon_table[]; +extern const struct earlycon_id *__earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used @@ -362,12 +362,19 @@ extern const struct earlycon_id __earlycon_table_end[]; #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif -#define OF_EARLYCON_DECLARE(_name, compat, fn) \ - static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \ - EARLYCON_USED_OR_UNUSED __section(__earlycon_table) \ +#define _OF_EARLYCON_DECLARE(_name, compat, fn, unique_id) \ + static const struct earlycon_id unique_id \ + EARLYCON_USED_OR_UNUSED __initconst \ = { .name = __stringify(_name), \ .compatible = compat, \ - .setup = fn } + .setup = fn }; \ + static const struct earlycon_id EARLYCON_USED_OR_UNUSED \ + __section(__earlycon_table) \ + * const __PASTE(__p, unique_id) = &unique_id + +#define OF_EARLYCON_DECLARE(_name, compat, fn) \ + _OF_EARLYCON_DECLARE(_name, compat, fn, \ + __UNIQUE_ID(__earlycon_##_name)) #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) From 7010adcdd23527f7efef1e7bc0d8c458f6ec0dd2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 23 Apr 2018 00:02:09 +0200 Subject: [PATCH 455/660] x86/jailhouse: Fix incorrect SPDX identifier GPL2.0 is not a valid SPDX identiier. Replace it with GPL-2.0. Fixes: 4a362601baa6 ("x86/jailhouse: Add infrastructure for running in non-root cell") Signed-off-by: Thomas Gleixner Acked-by: Jan Kiszka Cc: Kate Stewart Cc: Jonathan Corbet Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Link: https://lkml.kernel.org/r/20180422220832.815346488@linutronix.de --- arch/x86/include/asm/jailhouse_para.h | 2 +- arch/x86/kernel/jailhouse.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h index b885a961a150..a34897aef2c2 100644 --- a/arch/x86/include/asm/jailhouse_para.h +++ b/arch/x86/include/asm/jailhouse_para.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL2.0 */ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Jailhouse paravirt detection diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index fa183a131edc..a15fe0e92cf9 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL2.0 +// SPDX-License-Identifier: GPL-2.0 /* * Jailhouse paravirt_ops implementation * From 066cd1c4e4b9b43116f2bf0f4a84ac7235e7abec Mon Sep 17 00:00:00 2001 From: Karthikeyan Ramasubramanian Date: Fri, 6 Apr 2018 18:49:00 -0600 Subject: [PATCH 456/660] tty: serial: qcom_geni_serial: Use signed variable to get IRQ The platform_get_irq can return error. Assigning the return value to an unsigned variable and checking it for negative value will always return false. Use an intermediate signed variable to get IRQ information, check for any error and then assign it to 'irq' variable inside uart_port structure. Signed-off-by: Karthikeyan Ramasubramanian Reported-by: Dan Carpenter Reviewed-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 65ff669373d4..a1b3eb04cb32 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1022,6 +1022,7 @@ static int qcom_geni_serial_probe(struct platform_device *pdev) struct qcom_geni_serial_port *port; struct uart_port *uport; struct resource *res; + int irq; if (pdev->dev.of_node) line = of_alias_get_id(pdev->dev.of_node, "serial"); @@ -1061,11 +1062,12 @@ static int qcom_geni_serial_probe(struct platform_device *pdev) port->rx_fifo_depth = DEF_FIFO_DEPTH_WORDS; port->tx_fifo_width = DEF_FIFO_WIDTH_BITS; - uport->irq = platform_get_irq(pdev, 0); - if (uport->irq < 0) { - dev_err(&pdev->dev, "Failed to get IRQ %d\n", uport->irq); - return uport->irq; + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ %d\n", irq); + return irq; } + uport->irq = irq; uport->private_data = &qcom_geni_console_driver; platform_set_drvdata(pdev, port); From 17a16542b88e753cc3bd54cf30b74df3d547421e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 23 Apr 2018 09:35:16 +0200 Subject: [PATCH 457/660] dt-bindings: mvebu-uart: DT fix s/interrupts-names/interrupt-names/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/mvebu-uart.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/mvebu-uart.txt b/Documentation/devicetree/bindings/serial/mvebu-uart.txt index 2ae2fee7e023..b7e0e32b9ac6 100644 --- a/Documentation/devicetree/bindings/serial/mvebu-uart.txt +++ b/Documentation/devicetree/bindings/serial/mvebu-uart.txt @@ -24,7 +24,7 @@ Required properties: - Must contain two elements for the extended variant of the IP (marvell,armada-3700-uart-ext): "uart-tx" and "uart-rx", respectively the UART TX interrupt and the UART RX interrupt. A - corresponding interrupts-names property must be defined. + corresponding interrupt-names property must be defined. - For backward compatibility reasons, a single element interrupts property is also supported for the standard variant of the IP, containing only the UART sum interrupt. This form is deprecated From 598c2d41ff44889dd8eced4f117403e472158d85 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 16 Apr 2018 20:06:34 +0900 Subject: [PATCH 458/660] tty: Avoid possible error pointer dereference at tty_ldisc_restore(). syzbot is reporting crashes [1] triggered by memory allocation failure at tty_ldisc_get() from tty_ldisc_restore(). While syzbot stops at WARN_ON() due to panic_on_warn == true, panic_on_warn == false will after all trigger an OOPS by dereferencing old->ops->num if IS_ERR(old) == true. We can simplify tty_ldisc_restore() as three calls (old->ops->num, N_TTY, N_NULL) to tty_ldisc_failto() in addition to avoiding possible error pointer dereference. If someone reports kernel panic triggered by forcing all memory allocations for tty_ldisc_restore() to fail, we can consider adding __GFP_NOFAIL for tty_ldisc_restore() case. [1] https://syzkaller.appspot.com/bug?id=6ac359c61e71d22e06db7f8f88243feb11d927e7 Reported-by: syzbot+40b7287c2dc987c48c81@syzkaller.appspotmail.com Signed-off-by: Tetsuo Handa Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Dmitry Vyukov Cc: Johannes Weiner Cc: Alan Cox Cc: Christoph Hellwig Cc: Michal Hocko Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 050f4d650891..c1cf6cb83185 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -527,19 +527,16 @@ static int tty_ldisc_failto(struct tty_struct *tty, int ld) static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) { /* There is an outstanding reference here so this is safe */ - old = tty_ldisc_get(tty, old->ops->num); - WARN_ON(IS_ERR(old)); - tty->ldisc = old; - tty_set_termios_ldisc(tty, old->ops->num); - if (tty_ldisc_open(tty, old) < 0) { - tty_ldisc_put(old); + if (tty_ldisc_failto(tty, old->ops->num) < 0) { + const char *name = tty_name(tty); + + pr_warn("Falling back ldisc for %s.\n", name); /* The traditional behaviour is to fall back to N_TTY, we want to avoid falling back to N_NULL unless we have no choice to avoid the risk of breaking anything */ if (tty_ldisc_failto(tty, N_TTY) < 0 && tty_ldisc_failto(tty, N_NULL) < 0) - panic("Couldn't open N_NULL ldisc for %s.", - tty_name(tty)); + panic("Couldn't open N_NULL ldisc for %s.", name); } } From 903f9db10f18f735e62ba447147b6c434b6af003 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 5 Apr 2018 19:40:16 +0900 Subject: [PATCH 459/660] tty: Don't call panic() at tty_ldisc_init() syzbot is reporting kernel panic [1] triggered by memory allocation failure at tty_ldisc_get() from tty_ldisc_init(). But since both tty_ldisc_get() and caller of tty_ldisc_init() can cleanly handle errors, tty_ldisc_init() does not need to call panic() when tty_ldisc_get() failed. [1] https://syzkaller.appspot.com/bug?id=883431818e036ae6a9981156a64b821110f39187 Signed-off-by: Tetsuo Handa Reported-by: syzbot Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 5 ++++- drivers/tty/tty_ldisc.c | 5 +++-- include/linux/tty.h | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 63114ea35ec1..7c838b90a31d 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2816,7 +2816,10 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx) kref_init(&tty->kref); tty->magic = TTY_MAGIC; - tty_ldisc_init(tty); + if (tty_ldisc_init(tty)) { + kfree(tty); + return NULL; + } tty->session = NULL; tty->pgrp = NULL; mutex_init(&tty->legacy_mutex); diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index c1cf6cb83185..8a88a7787cfe 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -821,12 +821,13 @@ EXPORT_SYMBOL_GPL(tty_ldisc_release); * the tty structure is not completely set up when this call is made. */ -void tty_ldisc_init(struct tty_struct *tty) +int tty_ldisc_init(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_get(tty, N_TTY); if (IS_ERR(ld)) - panic("n_tty: init_tty"); + return PTR_ERR(ld); tty->ldisc = ld; + return 0; } /** diff --git a/include/linux/tty.h b/include/linux/tty.h index 47f8af22f216..1dd587ba6d88 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -701,7 +701,7 @@ extern int tty_unregister_ldisc(int disc); extern int tty_set_ldisc(struct tty_struct *tty, int disc); extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); extern void tty_ldisc_release(struct tty_struct *tty); -extern void tty_ldisc_init(struct tty_struct *tty); +extern int __must_check tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); From 66dd99c2033684169f04068f66c7f83f7da229b8 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 23 Apr 2018 10:55:21 +0200 Subject: [PATCH 460/660] tty: serial: xuartps: Setup early console when uartclk is also passed Baudrate calculation depends on requested baudrate and uart clock. This patch is checking that uartclk is also passed. The same logic is used 8250_early.c/init_port function. Signed-off-by: Michal Simek Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index abcb4d09a2d8..bd72dd843338 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1181,7 +1181,7 @@ static int __init cdns_early_console_setup(struct earlycon_device *device, /* only set baud if specified on command line - otherwise * assume it has been initialized by a boot loader. */ - if (device->baud) { + if (port->uartclk && device->baud) { u32 cd = 0, bdiv = 0; u32 mr; int div8; From 904e1b1ff4c70044334f395aa751c8e73fb42714 Mon Sep 17 00:00:00 2001 From: Abhay Kumar Date: Wed, 18 Apr 2018 13:37:07 +0300 Subject: [PATCH 461/660] drm/i915/audio: set minimum CD clock to twice the BCLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In GLK when the device boots with only 1366x768 panel without audio, HDA codec doesn't come up. In this case, the CDCLK is less than twice the BCLK. Even though audio isn't being enabled, having a too low CDCLK leads to audio probe failing altogether. Require CDCLK to be at least twice the BLCK regardless of audio. This is a minimal fix to improve things. Unfortunately, this a) leads to too high CDCLK being used when audio is not used, and b) is still not enough to fix audio probe when no outputs are connected at probe time. The proper fix would be to increase CDCLK dynamically from the audio component hooks. v2: - Address comment (Jani) - New design approach v3: - Typo fix on top of v1 v4 by Jani: rewrite commit message, add comment in code Cc: stable@vger.kernel.org Cc: Ville Syrjälä Cc: Dhinakaran Pandiyan Cc: Wenkai Du Reviewed-by: Wenkai Du Tested-by: Wenkai Du Acked-by: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102937 Signed-off-by: Abhay Kumar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180418103707.14645-1-jani.nikula@intel.com (cherry picked from commit 2a5b95b448485e143ec3e004eabe53b31db78eb3) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_cdclk.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index fc8b2c6e3508..32d24c69da3c 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -2140,10 +2140,22 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) } } - /* According to BSpec, "The CD clock frequency must be at least twice + /* + * According to BSpec, "The CD clock frequency must be at least twice * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. + * + * FIXME: Check the actual, not default, BCLK being used. + * + * FIXME: This does not depend on ->has_audio because the higher CDCLK + * is required for audio probe, also when there are no audio capable + * displays connected at probe time. This leads to unnecessarily high + * CDCLK when audio is not required. + * + * FIXME: This limit is only applied when there are displays connected + * at probe time. If we probe without displays, we'll still end up using + * the platform minimum CDCLK, failing audio probe. */ - if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) + if (INTEL_GEN(dev_priv) >= 9) min_cdclk = max(2 * 96000, min_cdclk); /* From ac315c621f01d4b8a53dec317c7ae322fd26ff38 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 19 Apr 2018 18:51:09 +0300 Subject: [PATCH 462/660] drm/i915: Enable display WA#1183 from its correct spot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DMC FW specific part of display WA#1183 is supposed to be enabled whenever enabling DC5 or DC6, so move it to the DC6 enable function from the DC6 disable function. I noticed this after Daniel's patch to remove the unused skl_disable_dc6() function. Fixes: 53421c2fe99c ("drm/i915: Apply Display WA #1183 on skl, kbl, and cfl") Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180419155109.29451-1-imre.deak@intel.com (cherry picked from commit b49be6622f08187129561cff0409f7b06b33de57) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_runtime_pm.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 53ea564f971e..66de4b2dc8b7 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -641,19 +641,18 @@ void skl_enable_dc6(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Enabling DC6\n"); - gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); } void skl_disable_dc6(struct drm_i915_private *dev_priv) { DRM_DEBUG_KMS("Disabling DC6\n"); - /* Wa Display #1183: skl,kbl,cfl */ - if (IS_GEN9_BC(dev_priv)) - I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | - SKL_SELECT_ALTERNATE_DC_EXIT); - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); } From 7aa135fcf26377f92dc0680a57566b4c7f3e281b Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 28 Mar 2018 11:14:50 +0200 Subject: [PATCH 463/660] ANDROID: binder: prevent transactions into own process. This can't happen with normal nodes (because you can't get a ref to a node you own), but it could happen with the context manager; to make the behavior consistent with regular nodes, reject transactions into the context manager by the process owning it. Reported-by: syzbot+09e05aba06723a94d43d@syzkaller.appspotmail.com Signed-off-by: Martijn Coenen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 764b63a5aade..e578eee31589 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2839,6 +2839,14 @@ static void binder_transaction(struct binder_proc *proc, else return_error = BR_DEAD_REPLY; mutex_unlock(&context->context_mgr_node_lock); + if (target_node && target_proc == proc) { + binder_user_error("%d:%d got transaction to context manager from process owning it\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_invalid_target_handle; + } } if (!target_node) { /* From 6e3d66b80f670fdc64b9a120362a9f94b0494621 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Apr 2018 11:19:24 -0700 Subject: [PATCH 464/660] uio_hv_generic: set size of ring buffer attribute The original code had ring size as a module parameter, but then it was made a fixed value. The code to set the size of the ring buffer binary file was lost in the transistion. The size is needed by user mode driver to know the size of the ring buffer. Fixes: 37b96a4931db ("uio_hv_generic: support sub-channels") Signed-off-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_hv_generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index f695a7e8c314..7ff659dff11d 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -171,12 +171,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, return 0; } -static struct bin_attribute ring_buffer_bin_attr __ro_after_init = { +static const struct bin_attribute ring_buffer_bin_attr = { .attr = { .name = "ring", .mode = 0600, - /* size is set at init time */ }, + .size = 2 * HV_RING_SIZE * PAGE_SIZE, .mmap = hv_uio_ring_mmap, }; From 9ab877a6ccf820483d79602bede0c1aa1da4d26a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Apr 2018 11:19:25 -0700 Subject: [PATCH 465/660] uio_hv_generic: make ring buffer attribute for primary channel The primary channel also needs a ring buffer attribute. This allows application to check if kernel supports uio sub channels, and also makes all channels use consistent API. Fixes: 37b96a4931db ("uio_hv_generic: support sub-channels") Signed-off-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_hv_generic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 7ff659dff11d..972a42dd2a46 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -326,6 +326,11 @@ hv_uio_probe(struct hv_device *dev, vmbus_set_chn_rescind_callback(dev->channel, hv_uio_rescind); vmbus_set_sc_create_callback(dev->channel, hv_uio_new_channel); + ret = sysfs_create_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr); + if (ret) + dev_notice(&dev->device, + "sysfs create ring bin file failed; %d\n", ret); + hv_set_drvdata(dev, pdata); return 0; From 135db384a2efde3718fd551e3968e97fcb400c84 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Apr 2018 11:19:26 -0700 Subject: [PATCH 466/660] uio_hv_generic: use correct channel in isr Need to mask the correct sub-channel in the callback from VMBUS isr. Otherwise, can get in to infinite interrupt storm. Fixes: 37b96a4931db ("uio_hv_generic: support sub-channels") Signed-off-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_hv_generic.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 972a42dd2a46..a9d7be4b964f 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -94,10 +94,11 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state) */ static void hv_uio_channel_cb(void *context) { - struct hv_uio_private_data *pdata = context; - struct hv_device *dev = pdata->device; + struct vmbus_channel *chan = context; + struct hv_device *hv_dev = chan->device_obj; + struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev); - dev->channel->inbound.ring_buffer->interrupt_mask = 1; + chan->inbound.ring_buffer->interrupt_mask = 1; virt_mb(); uio_event_notify(&pdata->info); @@ -180,19 +181,18 @@ static const struct bin_attribute ring_buffer_bin_attr = { .mmap = hv_uio_ring_mmap, }; -/* Callback from VMBUS subystem when new channel created. */ +/* Callback from VMBUS subsystem when new channel created. */ static void hv_uio_new_channel(struct vmbus_channel *new_sc) { struct hv_device *hv_dev = new_sc->primary_channel->device_obj; struct device *device = &hv_dev->device; - struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev); const size_t ring_bytes = HV_RING_SIZE * PAGE_SIZE; int ret; /* Create host communication ring */ ret = vmbus_open(new_sc, ring_bytes, ring_bytes, NULL, 0, - hv_uio_channel_cb, pdata); + hv_uio_channel_cb, new_sc); if (ret) { dev_err(device, "vmbus_open subchannel failed: %d\n", ret); return; @@ -234,7 +234,7 @@ hv_uio_probe(struct hv_device *dev, ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE, HV_RING_SIZE * PAGE_SIZE, NULL, 0, - hv_uio_channel_cb, pdata); + hv_uio_channel_cb, dev->channel); if (ret) goto fail; From ce3d1536acabbdcdc3c945c3c078dd4ed1b8edfa Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Apr 2018 11:19:27 -0700 Subject: [PATCH 467/660] uio_hv_generic: fix subchannel ring mmap The fault method of handling subchannel ring, did not work correctly (it only worked for the first page). Since ring buffer is physically contiguous, using the vm helper function is simpler and handles more cases. Fixes: 37b96a4931db ("uio_hv_generic: support sub-channels") Signed-off-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_hv_generic.c | 49 +++++++----------------------------- 1 file changed, 9 insertions(+), 40 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index a9d7be4b964f..c690d100adcd 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -19,7 +19,7 @@ * # echo -n "ed963694-e847-4b2a-85af-bc9cfc11d6f3" \ * > /sys/bus/vmbus/drivers/uio_hv_generic/bind */ - +#define DEBUG 1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include @@ -122,54 +122,23 @@ static void hv_uio_rescind(struct vmbus_channel *channel) uio_event_notify(&pdata->info); } -/* - * Handle fault when looking for sub channel ring buffer - * Subchannel ring buffer is same as resource 0 which is main ring buffer - * This is derived from uio_vma_fault +/* Sysfs API to allow mmap of the ring buffers + * The ring buffer is allocated as contiguous memory by vmbus_open */ -static int hv_uio_vma_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - void *ring_buffer = vma->vm_private_data; - struct page *page; - void *addr; - - addr = ring_buffer + (vmf->pgoff << PAGE_SHIFT); - page = virt_to_page(addr); - get_page(page); - vmf->page = page; - return 0; -} - -static const struct vm_operations_struct hv_uio_vm_ops = { - .fault = hv_uio_vma_fault, -}; - -/* Sysfs API to allow mmap of the ring buffers */ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, struct vm_area_struct *vma) { struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); - unsigned long requested_pages, actual_pages; + struct hv_device *dev = channel->primary_channel->device_obj; + u16 q_idx = channel->offermsg.offer.sub_channel_index; - if (vma->vm_end < vma->vm_start) - return -EINVAL; + dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n", + q_idx, vma_pages(vma), vma->vm_pgoff); - /* only allow 0 for now */ - if (vma->vm_pgoff > 0) - return -EINVAL; - - requested_pages = vma_pages(vma); - actual_pages = 2 * HV_RING_SIZE; - if (requested_pages > actual_pages) - return -EINVAL; - - vma->vm_private_data = channel->ringbuffer_pages; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_ops = &hv_uio_vm_ops; - return 0; + return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages), + channel->ringbuffer_pagecount << PAGE_SHIFT); } static const struct bin_attribute ring_buffer_bin_attr = { From 1ecb160199c3c289c660bdb852a9269e243009a8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 4 Apr 2018 22:37:13 +0200 Subject: [PATCH 468/660] test_firmware: Install all scripts List all the scripts invoked by fw_run_tests.sh, so that "make TARGETS=firmware install" keeps working. Fixes: 29a1c00ce1df8 ("test_firmware: add simple firmware firmware test ...") Fixes: b3cf21fae1fe0 ("test_firmware: test three firmware kernel configs ...") Signed-off-by: Ben Hutchings Acked-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index 826f38d5dd19..261c81f08606 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -4,6 +4,7 @@ all: TEST_PROGS := fw_run_tests.sh +TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh include ../lib.mk From e538409257d0217a9bc715686100a5328db75a15 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 4 Apr 2018 22:38:49 +0200 Subject: [PATCH 469/660] test_firmware: fix setting old custom fw path back on exit, second try Commit 65c79230576 tried to clear the custom firmware path on exit by writing a single space to the firmware_class.path parameter. This doesn't work because nothing strips this space from the value stored and fw_get_filesystem_firmware() only ignores zero-length paths. Instead, write a null byte. Fixes: 0a8adf58475 ("test: add firmware_class loader test") Fixes: 65c79230576 ("test_firmware: fix setting old custom fw path back on exit") Signed-off-by: Ben Hutchings Acked-by: Luis R. Rodriguez Cc: stable Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_lib.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 9ea31b57d71a..962d7f4ac627 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -154,11 +154,13 @@ test_finish() if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout fi - if [ "$OLD_FWPATH" = "" ]; then - OLD_FWPATH=" " - fi if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then - echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + if [ "$OLD_FWPATH" = "" ]; then + # A zero-length write won't work; write a null byte + printf '\000' >/sys/module/firmware_class/parameters/path + else + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + fi fi if [ -f $FW ]; then rm -f "$FW" From df9267f1fb6b56fa573cc93c611009b0bd126878 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 8 Apr 2018 18:06:21 +0200 Subject: [PATCH 470/660] firmware: Fix firmware documentation for recent file renames firmware_class.c was split into several files under drivers/base/firmware_loader. The new main.c has the functions which /request_firmware.rst references. Acked-by: Luis R. Rodriguez Signed-off-by: Hans de Goede Tested-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/firmware/request_firmware.rst | 10 +++++----- Documentation/driver-api/infrastructure.rst | 2 +- Documentation/power/suspend-and-cpuhotplug.txt | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/driver-api/firmware/request_firmware.rst b/Documentation/driver-api/firmware/request_firmware.rst index cf4516dfbf96..20f21ed427a5 100644 --- a/Documentation/driver-api/firmware/request_firmware.rst +++ b/Documentation/driver-api/firmware/request_firmware.rst @@ -17,17 +17,17 @@ an error is returned. request_firmware ---------------- -.. kernel-doc:: drivers/base/firmware_class.c +.. kernel-doc:: drivers/base/firmware_loader/main.c :functions: request_firmware request_firmware_direct ----------------------- -.. kernel-doc:: drivers/base/firmware_class.c +.. kernel-doc:: drivers/base/firmware_loader/main.c :functions: request_firmware_direct request_firmware_into_buf ------------------------- -.. kernel-doc:: drivers/base/firmware_class.c +.. kernel-doc:: drivers/base/firmware_loader/main.c :functions: request_firmware_into_buf Asynchronous firmware requests @@ -41,7 +41,7 @@ in atomic contexts. request_firmware_nowait ----------------------- -.. kernel-doc:: drivers/base/firmware_class.c +.. kernel-doc:: drivers/base/firmware_loader/main.c :functions: request_firmware_nowait Special optimizations on reboot @@ -55,7 +55,7 @@ firmare to be loaded. firmware_request_cache() ----------------------- -.. kernel-doc:: drivers/base/firmware_class.c +.. kernel-doc:: drivers/base/firmware_loader/main.c :functions: firmware_request_cache request firmware API expected driver use diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst index 6d9ff316b608..bee1b9a1702f 100644 --- a/Documentation/driver-api/infrastructure.rst +++ b/Documentation/driver-api/infrastructure.rst @@ -28,7 +28,7 @@ Device Drivers Base .. kernel-doc:: drivers/base/node.c :internal: -.. kernel-doc:: drivers/base/firmware_class.c +.. kernel-doc:: drivers/base/firmware_loader/main.c :export: .. kernel-doc:: drivers/base/transport_class.c diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index 31abd04b9572..6f55eb960a6d 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -168,7 +168,7 @@ update on the CPUs, as discussed below: [Please bear in mind that the kernel requests the microcode images from userspace, using the request_firmware() function defined in -drivers/base/firmware_class.c] +drivers/base/firmware_loader/main.c] a. When all the CPUs are identical: From 3e14c6abbfb5c94506edda9d8e2c145d79375798 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 11 Apr 2018 17:22:43 +0200 Subject: [PATCH 471/660] kobject: don't use WARN for registration failures This WARNING proved to be noisy. The function still returns an error and callers should handle it. That's how most of kernel code works. Downgrade the WARNING to pr_err() and leave WARNINGs for kernel bugs. Signed-off-by: Dmitry Vyukov Reported-by: syzbot+209c0f67f99fec8eb14b@syzkaller.appspotmail.com Reported-by: syzbot+7fb6d9525a4528104e05@syzkaller.appspotmail.com Reported-by: syzbot+2e63711063e2d8f9ea27@syzkaller.appspotmail.com Reported-by: syzbot+de73361ee4971b6e6f75@syzkaller.appspotmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/kobject.c b/lib/kobject.c index e1d1f290bf35..18989b5b3b56 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -233,13 +233,12 @@ static int kobject_add_internal(struct kobject *kobj) /* be noisy on error issues */ if (error == -EEXIST) - WARN(1, - "%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n", - __func__, kobject_name(kobj)); + pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n", + __func__, kobject_name(kobj)); else - WARN(1, "%s failed for %s (error: %d parent: %s)\n", - __func__, kobject_name(kobj), error, - parent ? kobject_name(parent) : "'none'"); + pr_err("%s failed for %s (error: %d parent: %s)\n", + __func__, kobject_name(kobj), error, + parent ? kobject_name(parent) : "'none'"); } else kobj->state_in_sysfs = 1; From e65de1b61302c8ddb7e126d07ef3b893d038839b Mon Sep 17 00:00:00 2001 From: "Bernat, Yehezkel" Date: Thu, 19 Apr 2018 14:02:30 +0300 Subject: [PATCH 472/660] MAINTAINERS: update my email address Soon I'll not be available by my Intel email address, so switching to my personal email address instead. Signed-off-by: Yehezkel Bernat Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 92be777d060a..0d9aa5768af0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13965,7 +13965,7 @@ THUNDERBOLT DRIVER M: Andreas Noever M: Michael Jamet M: Mika Westerberg -M: Yehezkel Bernat +M: Yehezkel Bernat T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git S: Maintained F: Documentation/admin-guide/thunderbolt.rst @@ -13975,7 +13975,7 @@ F: include/linux/thunderbolt.h THUNDERBOLT NETWORK DRIVER M: Michael Jamet M: Mika Westerberg -M: Yehezkel Bernat +M: Yehezkel Bernat L: netdev@vger.kernel.org S: Maintained F: drivers/net/thunderbolt.c From 881c93c0fb73328845898344208fa0bf0d62cac6 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Sun, 15 Apr 2018 11:33:08 -0700 Subject: [PATCH 473/660] fpga-manager: altera-ps-spi: preserve nCONFIG state If the driver module is loaded when FPGA is configured, the FPGA is reset because nconfig is pulled low (low-active gpio inited with GPIOD_OUT_HIGH activates the signal which means setting its value to low). Init nconfig with GPIOD_OUT_LOW to prevent this. Signed-off-by: Anatolij Gustschin Acked-by: Alan Tull Signed-off-by: Moritz Fischer Cc: stable # 4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/altera-ps-spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/fpga/altera-ps-spi.c b/drivers/fpga/altera-ps-spi.c index 14f14efdf0d5..06d212a3d49d 100644 --- a/drivers/fpga/altera-ps-spi.c +++ b/drivers/fpga/altera-ps-spi.c @@ -249,7 +249,7 @@ static int altera_ps_probe(struct spi_device *spi) conf->data = of_id->data; conf->spi = spi; - conf->config = devm_gpiod_get(&spi->dev, "nconfig", GPIOD_OUT_HIGH); + conf->config = devm_gpiod_get(&spi->dev, "nconfig", GPIOD_OUT_LOW); if (IS_ERR(conf->config)) { dev_err(&spi->dev, "Failed to get config gpio: %ld\n", PTR_ERR(conf->config)); From 4aa403b775fc027ee86d9a090a4c5c3ee9063089 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 4 Apr 2018 12:30:37 +0200 Subject: [PATCH 474/660] MAINTAINERS: add dri-devel&linaro-mm for Android ION Most of the other cross-driver gfx infrastructure (dma_buf, dma_fence) also gets cross posted to all the relevant gfx/memory lists. Doing the same for ION means people won't miss relevant patches. Cc: Sumit Semwal Signed-off-by: Daniel Vetter Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0d9aa5768af0..0bd7243b96cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -905,6 +905,8 @@ ANDROID ION DRIVER M: Laura Abbott M: Sumit Semwal L: devel@driverdev.osuosl.org +L: dri-devel@lists.freedesktop.org +L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers) S: Supported F: drivers/staging/android/ion F: drivers/staging/android/uapi/ion.h From e33bbe69149b802c0c77bfb822685772f85388ca Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 8 Apr 2018 11:02:34 +0200 Subject: [PATCH 475/660] slimbus: Fix out-of-bounds access in slim_slicesize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc-4.1.2: slimbus/messaging.c: In function ‘slim_slicesize’: slimbus/messaging.c:186: warning: statement with no effect Indeed, clamp() is a macro not operating in-place, but returning the clamped value. Hence the value is not clamped at all, which may lead to an out-of-bounds access. Fix this by assigning the clamped value. Fixes: afbdcc7c384b0d44 ("slimbus: Add messaging APIs to slimbus framework") Signed-off-by: Geert Uytterhoeven Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/messaging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index 884419c37e84..457ea1f8db30 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -183,7 +183,7 @@ static u16 slim_slicesize(int code) 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7 }; - clamp(code, 1, (int)ARRAY_SIZE(sizetocode)); + code = clamp(code, 1, (int)ARRAY_SIZE(sizetocode)); return sizetocode[code - 1]; } From 02cfde67df1f440c7c3c7038cc97992afb81804f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Apr 2018 15:24:47 +0200 Subject: [PATCH 476/660] virt: vbox: Move declarations of vboxguest private functions to private header Move the declarations of functions from vboxguest_utils.c which are only meant for vboxguest internal use from include/linux/vbox_utils.h to drivers/virt/vboxguest/vboxguest_core.h. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_core.h | 8 ++++++++ include/linux/vbox_utils.h | 23 ----------------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h index 6c784bf4fa6d..39ed85cf9244 100644 --- a/drivers/virt/vboxguest/vboxguest_core.h +++ b/drivers/virt/vboxguest/vboxguest_core.h @@ -171,4 +171,12 @@ irqreturn_t vbg_core_isr(int irq, void *dev_id); void vbg_linux_mouse_event(struct vbg_dev *gdev); +/* Private (non exported) functions form vboxguest_utils.c */ +void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +int vbg_req_perform(struct vbg_dev *gdev, void *req); +int vbg_hgcm_call32( + struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, + struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, + int *vbox_status); + #endif diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h index c71def6b310f..a240ed2a0372 100644 --- a/include/linux/vbox_utils.h +++ b/include/linux/vbox_utils.h @@ -24,24 +24,6 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...); #define vbg_debug pr_debug #endif -/** - * Allocate memory for generic request and initialize the request header. - * - * Return: the allocated memory - * @len: Size of memory block required for the request. - * @req_type: The generic request type. - */ -void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); - -/** - * Perform a generic request. - * - * Return: VBox status code - * @gdev: The Guest extension device. - * @req: Pointer to the request structure. - */ -int vbg_req_perform(struct vbg_dev *gdev, void *req); - int vbg_hgcm_connect(struct vbg_dev *gdev, struct vmmdev_hgcm_service_location *loc, u32 *client_id, int *vbox_status); @@ -52,11 +34,6 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms, u32 parm_count, int *vbox_status); -int vbg_hgcm_call32( - struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, - struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, - int *vbox_status); - /** * Convert a VirtualBox status code to a standard Linux kernel return value. * Return: 0 or negative errno value. From f6f9885b0531163f72c7bf898a0ab1ba4c7d5de6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Apr 2018 15:24:48 +0200 Subject: [PATCH 477/660] virt: vbox: Add vbg_req_free() helper function This is a preparation patch for fixing issues on x86_64 virtual-machines with more then 4G of RAM, atm we pass __GFP_DMA32 to kmalloc, but kmalloc does not honor that, so we need to switch to get_pages, which means we will not be able to use kfree to free memory allocated with vbg_alloc_req. While at it also remove a comment on a vbg_alloc_req call which talks about Windows (inherited from the vbox upstream cross-platform code). Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_core.c | 66 +++++++++++++----------- drivers/virt/vboxguest/vboxguest_core.h | 1 + drivers/virt/vboxguest/vboxguest_utils.c | 14 +++-- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index 190dbf8cfcb5..7411a535fda2 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -114,7 +114,7 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev) } out: - kfree(req); + vbg_req_free(req, sizeof(*req)); kfree(pages); } @@ -144,7 +144,7 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev) rc = vbg_req_perform(gdev, req); - kfree(req); + vbg_req_free(req, sizeof(*req)); if (rc < 0) { vbg_err("%s error: %d\n", __func__, rc); @@ -214,8 +214,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev) ret = vbg_status_code_to_errno(rc); out_free: - kfree(req2); - kfree(req1); + vbg_req_free(req2, sizeof(*req2)); + vbg_req_free(req1, sizeof(*req1)); return ret; } @@ -245,7 +245,7 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active) if (rc == VERR_NOT_IMPLEMENTED) /* Compatibility with older hosts. */ rc = VINF_SUCCESS; - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -431,7 +431,7 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled) rc = vbg_req_perform(gdev, req); do_div(req->interval_ns, 1000000); /* ns -> ms */ gdev->heartbeat_interval_ms = req->interval_ns; - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -454,12 +454,6 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev) if (ret < 0) return ret; - /* - * Preallocate the request to use it from the timer callback because: - * 1) on Windows vbg_req_alloc must be called at IRQL <= APC_LEVEL - * and the timer callback runs at DISPATCH_LEVEL; - * 2) avoid repeated allocations. - */ gdev->guest_heartbeat_req = vbg_req_alloc( sizeof(*gdev->guest_heartbeat_req), VMMDEVREQ_GUEST_HEARTBEAT); @@ -481,8 +475,8 @@ static void vbg_heartbeat_exit(struct vbg_dev *gdev) { del_timer_sync(&gdev->heartbeat_timer); vbg_heartbeat_host_config(gdev, false); - kfree(gdev->guest_heartbeat_req); - + vbg_req_free(gdev->guest_heartbeat_req, + sizeof(*gdev->guest_heartbeat_req)); } /** @@ -543,7 +537,7 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev, if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc); - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -617,7 +611,7 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev, out: mutex_unlock(&gdev->session_mutex); - kfree(req); + vbg_req_free(req, sizeof(*req)); return ret; } @@ -642,7 +636,7 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev) if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc); - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -712,7 +706,7 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev, out: mutex_unlock(&gdev->session_mutex); - kfree(req); + vbg_req_free(req, sizeof(*req)); return ret; } @@ -749,7 +743,7 @@ static int vbg_query_host_version(struct vbg_dev *gdev) } out: - kfree(req); + vbg_req_free(req, sizeof(*req)); return ret; } @@ -847,11 +841,16 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events) return 0; err_free_reqs: - kfree(gdev->mouse_status_req); - kfree(gdev->ack_events_req); - kfree(gdev->cancel_req); - kfree(gdev->mem_balloon.change_req); - kfree(gdev->mem_balloon.get_req); + vbg_req_free(gdev->mouse_status_req, + sizeof(*gdev->mouse_status_req)); + vbg_req_free(gdev->ack_events_req, + sizeof(*gdev->ack_events_req)); + vbg_req_free(gdev->cancel_req, + sizeof(*gdev->cancel_req)); + vbg_req_free(gdev->mem_balloon.change_req, + sizeof(*gdev->mem_balloon.change_req)); + vbg_req_free(gdev->mem_balloon.get_req, + sizeof(*gdev->mem_balloon.get_req)); return ret; } @@ -872,11 +871,16 @@ void vbg_core_exit(struct vbg_dev *gdev) vbg_reset_host_capabilities(gdev); vbg_core_set_mouse_status(gdev, 0); - kfree(gdev->mouse_status_req); - kfree(gdev->ack_events_req); - kfree(gdev->cancel_req); - kfree(gdev->mem_balloon.change_req); - kfree(gdev->mem_balloon.get_req); + vbg_req_free(gdev->mouse_status_req, + sizeof(*gdev->mouse_status_req)); + vbg_req_free(gdev->ack_events_req, + sizeof(*gdev->ack_events_req)); + vbg_req_free(gdev->cancel_req, + sizeof(*gdev->cancel_req)); + vbg_req_free(gdev->mem_balloon.change_req, + sizeof(*gdev->mem_balloon.change_req)); + vbg_req_free(gdev->mem_balloon.get_req, + sizeof(*gdev->mem_balloon.get_req)); } /** @@ -1415,7 +1419,7 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev, req->flags = dump->u.in.flags; dump->hdr.rc = vbg_req_perform(gdev, req); - kfree(req); + vbg_req_free(req, sizeof(*req)); return 0; } @@ -1513,7 +1517,7 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features) if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc); - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h index 39ed85cf9244..7ad9ec45bfa9 100644 --- a/drivers/virt/vboxguest/vboxguest_core.h +++ b/drivers/virt/vboxguest/vboxguest_core.h @@ -173,6 +173,7 @@ void vbg_linux_mouse_event(struct vbg_dev *gdev); /* Private (non exported) functions form vboxguest_utils.c */ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +void vbg_req_free(void *req, size_t len); int vbg_req_perform(struct vbg_dev *gdev, void *req); int vbg_hgcm_call32( struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index 0f0dab8023cf..bad915463359 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -82,6 +82,14 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) return req; } +void vbg_req_free(void *req, size_t len) +{ + if (!req) + return; + + kfree(req); +} + /* Note this function returns a VBox status code, not a negative errno!! */ int vbg_req_perform(struct vbg_dev *gdev, void *req) { @@ -137,7 +145,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev, rc = hgcm_connect->header.result; } - kfree(hgcm_connect); + vbg_req_free(hgcm_connect, sizeof(*hgcm_connect)); *vbox_status = rc; return 0; @@ -166,7 +174,7 @@ int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status) if (rc >= 0) rc = hgcm_disconnect->header.result; - kfree(hgcm_disconnect); + vbg_req_free(hgcm_disconnect, sizeof(*hgcm_disconnect)); *vbox_status = rc; return 0; @@ -623,7 +631,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, } if (!leak_it) - kfree(call); + vbg_req_free(call, size); free_bounce_bufs: if (bounce_bufs) { From faf6a2a44164c0fb2c2a82692ab9051917514bce Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Apr 2018 15:24:49 +0200 Subject: [PATCH 478/660] virt: vbox: Use __get_free_pages instead of kmalloc for DMA32 memory It is not possible to get DMA32 zone memory through kmalloc, causing the vboxguest driver to malfunction due to getting memory above 4G which the PCI device cannot handle. This commit changes the kmalloc calls where the 4G limit matters to using __get_free_pages() fixing vboxguest not working on x86_64 guests with more then 4G RAM. Cc: stable@vger.kernel.org Reported-by: Eloy Coto Pereiro Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_linux.c | 19 ++++++++++++++++--- drivers/virt/vboxguest/vboxguest_utils.c | 5 +++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c index 82e280d38cc2..398d22693234 100644 --- a/drivers/virt/vboxguest/vboxguest_linux.c +++ b/drivers/virt/vboxguest/vboxguest_linux.c @@ -87,6 +87,7 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, struct vbg_session *session = filp->private_data; size_t returned_size, size; struct vbg_ioctl_hdr hdr; + bool is_vmmdev_req; int ret = 0; void *buf; @@ -106,8 +107,17 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, if (size > SZ_16M) return -E2BIG; - /* __GFP_DMA32 because IOCTL_VMMDEV_REQUEST passes this to the host */ - buf = kmalloc(size, GFP_KERNEL | __GFP_DMA32); + /* + * IOCTL_VMMDEV_REQUEST needs the buffer to be below 4G to avoid + * the need for a bounce-buffer and another copy later on. + */ + is_vmmdev_req = (req & ~IOCSIZE_MASK) == VBG_IOCTL_VMMDEV_REQUEST(0) || + req == VBG_IOCTL_VMMDEV_REQUEST_BIG; + + if (is_vmmdev_req) + buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT); + else + buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -132,7 +142,10 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, ret = -EFAULT; out: - kfree(buf); + if (is_vmmdev_req) + vbg_req_free(buf, size); + else + kfree(buf); return ret; } diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index bad915463359..bf4474214b4d 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -65,8 +65,9 @@ VBG_LOG(vbg_debug, pr_debug); void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) { struct vmmdev_request_header *req; + int order = get_order(PAGE_ALIGN(len)); - req = kmalloc(len, GFP_KERNEL | __GFP_DMA32); + req = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA32, order); if (!req) return NULL; @@ -87,7 +88,7 @@ void vbg_req_free(void *req, size_t len) if (!req) return; - kfree(req); + free_pages((unsigned long)req, get_order(PAGE_ALIGN(len))); } /* Note this function returns a VBox status code, not a negative errno!! */ From 19972dd568f8d1e701f8051fcff56562e0551f63 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Apr 2018 15:24:50 +0200 Subject: [PATCH 479/660] virt: vbox: Log an error when we fail to get the host version This was the only error path during probe without a message being logged about what went wrong, this fixes this. Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index 7411a535fda2..2f3856a95856 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -727,8 +727,10 @@ static int vbg_query_host_version(struct vbg_dev *gdev) rc = vbg_req_perform(gdev, req); ret = vbg_status_code_to_errno(rc); - if (ret) + if (ret) { + vbg_err("%s error: %d\n", __func__, rc); goto out; + } snprintf(gdev->host_version, sizeof(gdev->host_version), "%u.%u.%ur%u", req->major, req->minor, req->build, req->revision); From 504a918e6714b551b7b39940dbab32610fafa1fe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 15 Apr 2018 11:08:07 +0200 Subject: [PATCH 480/660] dma-direct: don't retry allocation for no-op GFP_DMA When an allocation with lower dma_coherent mask fails, dma_direct_alloc() retries the allocation with GFP_DMA. But, this is useless for architectures that hav no ZONE_DMA. Fix it by adding the check of CONFIG_ZONE_DMA before retrying the allocation. Fixes: 95f183916d4b ("dma-direct: retry allocations using GFP_DMA for small masks") Signed-off-by: Takashi Iwai Signed-off-by: Christoph Hellwig --- lib/dma-direct.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/dma-direct.c b/lib/dma-direct.c index c0bba30fef0a..bbfb229aa067 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -84,7 +84,8 @@ again: __free_pages(page, page_order); page = NULL; - if (dev->coherent_dma_mask < DMA_BIT_MASK(32) && + if (IS_ENABLED(CONFIG_ZONE_DMA) && + dev->coherent_dma_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) { gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; From 41d0bbc749471522714efd0649b7e033fefcbfb4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 9 Apr 2018 18:59:14 +0100 Subject: [PATCH 481/660] dma-coherent: clarify dma_mmap_from_dev_coherent documentation The use of "correctly mapped" here is misleading, since it can give the wrong expectation in the case that the memory *should* have been mapped from the per-device pool, but doing so failed for other reasons. Signed-off-by: Robin Murphy Signed-off-by: Christoph Hellwig --- drivers/base/dma-coherent.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index 1e6396bb807b..597d40893862 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c @@ -312,8 +312,9 @@ static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem, * This checks whether the memory was allocated from the per-device * coherent memory pool and if so, maps that memory to the provided vma. * - * Returns 1 if we correctly mapped the memory, or 0 if the caller should - * proceed with mapping memory from generic pools. + * Returns 1 if @vaddr belongs to the device coherent pool and the caller + * should return @ret, or 0 if they should proceed with mapping memory from + * generic areas. */ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) From 60695be2bb6b0623f8e53bd9949d582a83c6d44a Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 13 Apr 2018 19:25:37 +0200 Subject: [PATCH 482/660] dma-mapping: postpone cpu addr translation on mmap Postpone calling virt_to_page() translation on memory locations not guaranteed to be backed by a struct page. Try first to map memory from the device coherent memory pool, then perform translation if that fails. On some architectures, specifically SH when configured with the SPARSEMEM memory model, assuming a struct page is always assigned to a memory address lead to unexpected hangs during the virtual to page address translation. This patch fixes that specific issue but applies in the general case too. Suggested-by: Laurent Pinchart Signed-off-by: Jacopo Mondi Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig --- drivers/base/dma-mapping.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 3b118353ea17..d82566d6e237 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -226,7 +226,6 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, #ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); unsigned long off = vma->vm_pgoff; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); @@ -234,12 +233,11 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; - if (off < count && user_count <= (count - off)) { + if (off < count && user_count <= (count - off)) ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, + page_to_pfn(virt_to_page(cpu_addr)) + off, user_count << PAGE_SHIFT, vma->vm_page_prot); - } #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ return ret; From dbac00f0cf634120d77edee10d25e3f6899d7636 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 22 Apr 2018 18:16:54 -0700 Subject: [PATCH 483/660] hwmon: (nct6683) Enable EC access if disabled at boot On Asrock Z370M Pro4, it was observed that EC access was disabled after initially booting the system. As a result, the driver failed to load with nct6683: EC is disabled After a suspend/resume cycle, the driver loaded correctly. nct6683: Found NCT6683D or compatible chip at 0x2e:0xa20 nct6683 nct6683.2592: NCT6683D EC firmware version 1.0 build 07/18/16 Enable EC access after identifying the chip if disabled to fix the problem. Warn the user that the data it reports may be unusable, similar to other drivers for chips from Nuvoton. Fixes: 41082d66bfd6f ("hwmon: Driver for NCT6683D") Reported-by: Jonathan Sims Tested-by: Jonathan Sims Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6683.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/nct6683.c b/drivers/hwmon/nct6683.c index 8b0bc4fc06e8..b0bc77bf2cd9 100644 --- a/drivers/hwmon/nct6683.c +++ b/drivers/hwmon/nct6683.c @@ -1380,8 +1380,8 @@ static int __init nct6683_find(int sioaddr, struct nct6683_sio_data *sio_data) /* Activate logical device if needed */ val = superio_inb(sioaddr, SIO_REG_ENABLE); if (!(val & 0x01)) { - pr_err("EC is disabled\n"); - goto fail; + pr_warn("Forcibly enabling EC access. Data may be unusable.\n"); + superio_outb(sioaddr, SIO_REG_ENABLE, val | 0x01); } superio_exit(sioaddr); From 7e5a206ab686f098367b61aca989f5cdfa8114a3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 20 Apr 2018 15:57:30 +0200 Subject: [PATCH 484/660] tcp: don't read out-of-bounds opsize The old code reads the "opsize" variable from out-of-bounds memory (first byte behind the segment) if a broken TCP segment ends directly after an opcode that is neither EOL nor NOP. The result of the read isn't used for anything, so the worst thing that could theoretically happen is a pagefault; and since the physmap is usually mostly contiguous, even that seems pretty unlikely. The following C reproducer triggers the uninitialized read - however, you can't actually see anything happen unless you put something like a pr_warn() in tcp_parse_md5sig_option() to print the opsize. ==================================== #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void systemf(const char *command, ...) { char *full_command; va_list ap; va_start(ap, command); if (vasprintf(&full_command, command, ap) == -1) err(1, "vasprintf"); va_end(ap); printf("systemf: <<<%s>>>\n", full_command); system(full_command); } char *devname; int tun_alloc(char *name) { int fd = open("/dev/net/tun", O_RDWR); if (fd == -1) err(1, "open tun dev"); static struct ifreq req = { .ifr_flags = IFF_TUN|IFF_NO_PI }; strcpy(req.ifr_name, name); if (ioctl(fd, TUNSETIFF, &req)) err(1, "TUNSETIFF"); devname = req.ifr_name; printf("device name: %s\n", devname); return fd; } #define IPADDR(a,b,c,d) (((a)<<0)+((b)<<8)+((c)<<16)+((d)<<24)) void sum_accumulate(unsigned int *sum, void *data, int len) { assert((len&2)==0); for (int i=0; i> 16) + (sum & 0xffff); sum = (sum >> 16) + (sum & 0xffff); return htons(~sum); } void fix_ip_sum(struct iphdr *ip) { unsigned int sum = 0; sum_accumulate(&sum, ip, sizeof(*ip)); ip->check = sum_final(sum); } void fix_tcp_sum(struct iphdr *ip, struct tcphdr *tcp) { unsigned int sum = 0; struct { unsigned int saddr; unsigned int daddr; unsigned char pad; unsigned char proto_num; unsigned short tcp_len; } fakehdr = { .saddr = ip->saddr, .daddr = ip->daddr, .proto_num = ip->protocol, .tcp_len = htons(ntohs(ip->tot_len) - ip->ihl*4) }; sum_accumulate(&sum, &fakehdr, sizeof(fakehdr)); sum_accumulate(&sum, tcp, tcp->doff*4); tcp->check = sum_final(sum); } int main(void) { int tun_fd = tun_alloc("inject_dev%d"); systemf("ip link set %s up", devname); systemf("ip addr add 192.168.42.1/24 dev %s", devname); struct { struct iphdr ip; struct tcphdr tcp; unsigned char tcp_opts[20]; } __attribute__((packed)) syn_packet = { .ip = { .ihl = sizeof(struct iphdr)/4, .version = 4, .tot_len = htons(sizeof(syn_packet)), .ttl = 30, .protocol = IPPROTO_TCP, /* FIXUP check */ .saddr = IPADDR(192,168,42,2), .daddr = IPADDR(192,168,42,1) }, .tcp = { .source = htons(1), .dest = htons(1337), .seq = 0x12345678, .doff = (sizeof(syn_packet.tcp)+sizeof(syn_packet.tcp_opts))/4, .syn = 1, .window = htons(64), .check = 0 /*FIXUP*/ }, .tcp_opts = { /* INVALID: trailing MD5SIG opcode after NOPs */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 19 } }; fix_ip_sum(&syn_packet.ip); fix_tcp_sum(&syn_packet.ip, &syn_packet.tcp); while (1) { int write_res = write(tun_fd, &syn_packet, sizeof(syn_packet)); if (write_res != sizeof(syn_packet)) err(1, "packet write failed"); } } ==================================== Fixes: cfb6eeb4c860 ("[TCP]: MD5 Signature Option (RFC2385) support.") Signed-off-by: Jann Horn Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 367def6ddeda..e51c644484dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3868,11 +3868,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); - /* If the TCP option is too short, we can short cut */ - if (length < TCPOLEN_MD5SIG) - return NULL; - - while (length > 0) { + /* If not enough data remaining, we can short cut */ + while (length >= TCPOLEN_MD5SIG) { int opcode = *ptr++; int opsize; From ee05d21791db6db954bbb7b79bb18d88b5f6b7ff Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 19 Feb 2018 19:05:45 +0900 Subject: [PATCH 485/660] perf machine: Set main kernel end address properly map_groups__fixup_end() was called to set the end addresses of kernel and module maps. But now since machine__create_modules() sets the end address of modules properly, the only remaining piece is the kernel map. We can set it with adjacent module's address directly instead of calling map_groups__fixup_end(). If there's no module after the kernel map, the end address will be ~0ULL. Since it also changes the start address of the kernel map, it needs to re-insert the map to the kmaps in order to keep a correct ordering. Kim reported that it caused problems on ARM64. Reported-by: Kim Phillips Tested-by: Kim Phillips Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20180419235915.GA19067@sejong Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2eca8478e24f..32d50492505d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1019,13 +1019,6 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type) return ret; } -static void map_groups__fixup_end(struct map_groups *mg) -{ - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) - __map_groups__fixup_end(mg, i); -} - static char *get_kernel_version(const char *root_dir) { char version[PATH_MAX]; @@ -1233,6 +1226,7 @@ int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); const char *name = NULL; + struct map *map; u64 addr = 0; int ret; @@ -1259,13 +1253,25 @@ int machine__create_kernel_maps(struct machine *machine) machine__destroy_kernel_maps(machine); return -1; } - machine__set_kernel_mmap(machine, addr, 0); + + /* we have a real start address now, so re-order the kmaps */ + map = machine__kernel_map(machine); + + map__get(map); + map_groups__remove(&machine->kmaps, map); + + /* assume it's the last in the kmaps */ + machine__set_kernel_mmap(machine, addr, ~0ULL); + + map_groups__insert(&machine->kmaps, map); + map__put(map); } - /* - * Now that we have all the maps created, just set the ->end of them: - */ - map_groups__fixup_end(&machine->kmaps); + /* update end address of the kernel map using adjacent module address */ + map = map__next(machine__kernel_map(machine)); + if (map) + machine__set_kernel_mmap(machine, addr, map->start); + return 0; } From b00e2fd10429eddf4a181ad30af010e56c2303f9 Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Thu, 12 Apr 2018 13:51:14 +0530 Subject: [PATCH 486/660] staging: wilc1000: fix NULL pointer exception in host_int_parse_assoc_resp_info() Commit fe014d4e6b55 (staging: wilc1000: free memory allocated for general info message from firmware) introduced a bug by using wrong source address in kmemdup(). 'conn_info.req_ies' is used for source address in kempdup() instead of 'hif_drv->usr_conn_req.ies'. This commit fixes the NULL pointer dereference issue in host_int_parse_assoc_resp_info() by using the correct source address in kmemdup(). Fixes: fe014d4e6b55 (staging: wilc1000: free memory allocated for general info message from firmware) Signed-off-by: Ajay Singh Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/host_interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/wilc1000/host_interface.c b/drivers/staging/wilc1000/host_interface.c index 6b5300ca44a6..885f5fcead77 100644 --- a/drivers/staging/wilc1000/host_interface.c +++ b/drivers/staging/wilc1000/host_interface.c @@ -1390,7 +1390,7 @@ static inline void host_int_parse_assoc_resp_info(struct wilc_vif *vif, } if (hif_drv->usr_conn_req.ies) { - conn_info.req_ies = kmemdup(conn_info.req_ies, + conn_info.req_ies = kmemdup(hif_drv->usr_conn_req.ies, hif_drv->usr_conn_req.ies_len, GFP_KERNEL); if (conn_info.req_ies) From 1f81f118406774ca65f5a4b1a63ee2478ba826ac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Apr 2018 08:50:01 -0500 Subject: [PATCH 487/660] USB: musb: host: prevent core phy initialisation Set the new HCD flag which prevents USB core from trying to manage our phys. This is needed to be able to associate the controller platform device with the glue device device-tree node on the BBB which uses legacy USB phys. Otherwise, the generic phy lookup in usb_phy_roothub_init() and thus HCD registration fails repeatedly with -EPROBE_DEFER (see commit 178a0bce05cb ("usb: core: hcd: integrate the PHY wrapper into the HCD core")). Note that a related phy-lookup issue was recently worked around in the phy core by commit b7563e2796f8 ("phy: work around 'phys' references to usb-nop-xceiv devices"). Something similar may now be needed for other USB phys, and in particular if we eventually want to let USB core manage musb generic phys. Cc: Arnd Bergmann Cc: Martin Blumenstingl Signed-off-by: Johan Hovold Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 3a8451a15f7f..4fa372c845e1 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2754,6 +2754,7 @@ int musb_host_setup(struct musb *musb, int power_budget) hcd->self.otg_port = 1; musb->xceiv->otg->host = &hcd->self; hcd->power_budget = 2 * (power_budget ? : 250); + hcd->skip_phy_initialization = 1; ret = usb_add_hcd(hcd, 0, 0); if (ret < 0) From be75d8f1da08db6c3ccf3bae5597628109a9e7d0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Apr 2018 08:50:00 -0500 Subject: [PATCH 488/660] USB: musb: dsps: drop duplicate phy initialisation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 39cee200c23e ("usb: musb: core: call init and shutdown for the usb phy") the musb USB phy is initialised by musb_core, but the original initialisation in the dsps-glue init callback was left in place resulting in two calls to phy init during probe (and similarly, two shutdowns on remove). Drop the duplicate phy init and shutdown calls from the dsps glue in favour of the ones in musb core, which other glue drivers rely on. Note however that any generic phy is still initialised in the glue init callback (just as for the other drivers). Cc: Uwe Kleine-König Signed-off-by: Johan Hovold Acked-by: Uwe Kleine-König Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dsps.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 05a679d5e3a2..6a60bc0490c5 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -451,7 +451,6 @@ static int dsps_musb_init(struct musb *musb) if (!rev) return -ENODEV; - usb_phy_init(musb->xceiv); if (IS_ERR(musb->phy)) { musb->phy = NULL; } else { @@ -501,7 +500,6 @@ static int dsps_musb_exit(struct musb *musb) struct dsps_glue *glue = dev_get_drvdata(dev->parent); del_timer_sync(&musb->dev_timer); - usb_phy_shutdown(musb->xceiv); phy_power_off(musb->phy); phy_exit(musb->phy); debugfs_remove_recursive(glue->dbgfs_root); From ce04abfbd3ea545a8eb38a8b6a48fb6e7d139dcb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 23 Apr 2018 10:17:45 +0200 Subject: [PATCH 489/660] perf list: Remove s390 specific strcmp_cpuid_cmp function Make the type field in pmu-events/arch/s390/mapfile.cvs more generic to match the created cpuid string for s390. The pattern also checks for the counter first version number and counter second version number ([13]\.[1-5]) and the authorization field which follows. These numbers do not exist in the cpuid identification string when perf commands are executed on a z/VM environment (which does not support CPU counter measurement facility). CPUID string for LPAR: cpuid : IBM,3906,704,M03,3.5,002f CPUID string for z/VM: cpuid : IBM,2964,702,N96 This allows the removal of s390 specific cpuid compare code and uses the common compare function with its regular expression matching algorithm. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180423081745.3672-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/header.c | 18 ------------------ tools/perf/pmu-events/arch/s390/mapfile.csv | 10 +++++----- tools/perf/util/pmu.c | 2 +- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index a4c30f1c70be..163b92f33998 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -146,21 +146,3 @@ char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) zfree(&buf); return buf; } - -/* - * Compare the cpuid string returned by get_cpuid() function - * with the name generated by the jevents file read from - * pmu-events/arch/s390/mapfile.csv. - * - * Parameter mapcpuid is the cpuid as stored in the - * pmu-events/arch/s390/mapfile.csv. This is just the type number. - * Parameter cpuid is the cpuid returned by function get_cpuid(). - */ -int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) -{ - char *cp = strchr(cpuid, ','); - - if (cp == NULL) - return -1; - return strncmp(cp + 1, mapcpuid, strlen(mapcpuid)); -} diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index ca7682748a4b..78bcf7f8e206 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -1,6 +1,6 @@ Family-model,Version,Filename,EventType -209[78],1,cf_z10,core -281[78],1,cf_z196,core -282[78],1,cf_zec12,core -296[45],1,cf_z13,core -3906,3,cf_z14,core +^IBM.209[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z10,core +^IBM.281[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z196,core +^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core +^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core +^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 61a5e5027338..af4bedf4cf98 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -586,7 +586,7 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) * cpuid string generated on this platform. * Otherwise return non-zero. */ -int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) +int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) { regex_t re; regmatch_t pmatch[1]; From b31a8cc1a53dda3a33b6c9c62779869d4d5fc142 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 23 Apr 2018 10:24:28 +0200 Subject: [PATCH 490/660] perf test: Adapt test case record+probe_libc_inet_pton.sh for s390 perf test case 58 (record+probe_libc_inet_pton.sh) executed on s390x using kernel 4.16.0rc3 displays this result: # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 probe_libc:inet_pton: (3ffa0240448) __GI___inet_pton (/usr/lib64/libc-2.26.so) gaih_inet (inlined) __GI_getaddrinfo (inlined) main (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) _start (/usr/bin/ping) After I installed kernel 4.16.0 the same tests uses commands: # perf record -e probe_libc:inet_pton/call-graph=dwarf/ -o /tmp/perf.data.abc ping -6 -c 1 ::1 # perf script -i /tmp/perf.data.abc and displays: ping 39048 [006] 84230.381198: probe_libc:inet_pton: (3ffa0240448) 140448 __GI___inet_pton (/usr/lib64/libc-2.26.so) fbde1 gaih_inet (inlined) fe2b9 __GI_getaddrinfo (inlined) 398d main (/usr/bin/ping) Nothing else changed including glibc elfutils and other libraries picked up by the build. The entries for __libc_start_main and _start are missing. I bisected missing __libc_start_main and _start to commit Fixes: 3d20c6246690 ("perf unwind: Unwind with libdw doesn't take symfs into account") When I undo this commit I get this call stack on s390: [root@s35lp76 perf]# ./perf script -i /tmp/perf.data.abc ping 39048 [006] 84230.381198: probe_libc:inet_pton: (3ffa0240448) 140448 __GI___inet_pton (/usr/lib64/libc-2.26.so) fbde1 gaih_inet (inlined) fe2b9 __GI_getaddrinfo (inlined) 398d main (/usr/bin/ping) 22fbd __libc_start_main (/usr/lib64/libc-2.26.so) 457b _start (/usr/bin/ping) Looks like dwarf functions dwfl_xxx create different call back stack trace when using file /usr/lib/debug/usr/bin/ping-20161105-7.fc27.s390x.debug instead of file /usr/bin/ping. Fix this test case on s390 and do not expect any call back stack entry after the main() function. Also be more robust and accept a leading __GI_ prefix in front of getaddrinfo. On x86 this test case shows the same call stack using both kernel versions 4.16.0rc3 and 4.16.0 and also stops at main: [root@f27 perf]# ./perf script -i /tmp/perf.data.tmr ping 4446 [000] 172.027088: probe_libc:inet_pton: (7fdfa08c93c0) 1393c0 __GI___inet_pton (/usr/lib64/libc-2.26.so) fe60d getaddrinfo (/usr/lib64/libc-2.26.so) 2f40 main (/usr/bin/ping) [root@f27 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Martin Vuille Link: http://lkml.kernel.org/r/20180423082428.7930-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 1ecc1f0ff84a..016882dbbc16 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -19,12 +19,10 @@ trace_libc_inet_pton_backtrace() { expected[1]=".*inet_pton[[:space:]]\($libc\)$" case "$(uname -m)" in s390x) - eventattr='call-graph=dwarf' + eventattr='call-graph=dwarf,max-stack=4' expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$" - expected[3]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$" + expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$" expected[4]="main[[:space:]]\(.*/bin/ping.*\)$" - expected[5]="__libc_start_main[[:space:]]\($libc\)$" - expected[6]="_start[[:space:]]\(.*/bin/ping.*\)$" ;; *) eventattr='max-stack=3' From 129193bb0c43d42f1c397c175346e3e0dba5a578 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 Apr 2018 11:08:17 +0200 Subject: [PATCH 491/660] perf stat: Keep the / modifier separator in fallback The 'perf stat' fallback for EACCES error sets the exclude_kernel perf_event_attr and tries perf_event_open() again with it. In addition, it also changes the name of the event to reflect that change by adding the 'u' modifier. But it does not take into account the '/' separator, so the event name can end up mangled, like: (note the '/:' characters) $ perf stat -e cpu/cpu-cycles/ kill ... 386,832 cpu/cpu-cycles/:u Adding the code to check on the '/' separator and set the following correct event name: $ perf stat -e cpu/cpu-cycles/ kill ... 388,548 cpu/cpu-cycles/u Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180423090823.32309-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3e87486c28fe..7eb1e9850abf 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2754,8 +2754,14 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, (paranoid = perf_event_paranoid()) > 1) { const char *name = perf_evsel__name(evsel); char *new_name; + const char *sep = ":"; - if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0) + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + strchr(name, ':')) + sep = ""; + + if (asprintf(&new_name, "%s%su", name, sep) < 0) return false; if (evsel->name) From 9a4a931ce847f4aaa12edf11b2e050e18bf45910 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 Apr 2018 11:08:18 +0200 Subject: [PATCH 492/660] perf pmu: Fix pmu events parsing rule Currently all the event parsing fails end up in the event_pmu rule, and display misleading help like: $ perf stat -e inst kill event syntax error: 'inst' \___ Cannot find PMU `inst'. Missing kernel support? ... The reason is that the event_pmu is too strong and match also single string. Changing it to force the '/' separators to be part of the rule, and getting the proper error now: $ perf stat -e inst kill event syntax error: 'inst' \___ parser error Run 'perf list' for a list of valid events ... Signed-off-by: Jiri Olsa Reported-by: Ingo Molnar Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180423090823.32309-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 7afeb80cc39e..d14464c42714 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -224,15 +224,15 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME opt_event_config +PE_NAME '/' event_config '/' { struct list_head *list, *orig_terms, *terms; - if (parse_events_copy_term_list($2, &orig_terms)) + if (parse_events_copy_term_list($3, &orig_terms)) YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { + if (parse_events_add_pmu(_parse_state, list, $1, $3, false)) { struct perf_pmu *pmu = NULL; int ok = 0; char *pattern; @@ -262,7 +262,7 @@ PE_NAME opt_event_config if (!ok) YYABORT; } - parse_events_terms__delete($2); + parse_events_terms__delete($3); parse_events_terms__delete(orig_terms); $$ = list; } From 1ba7862f1f5a7a3b268cf79ac236611546888a90 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 23 Apr 2018 15:01:44 +0200 Subject: [PATCH 493/660] ALSA: control: Fix missing __user annotation There is one place missing __user annotation to the pointer used by the recent code refactoring. Reported by sparse. Fixes: 450296f305f1 ("ALSA: control: code refactoring TLV ioctl handler") Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index 69734b0eafd0..9aa15bfc7936 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1492,7 +1492,7 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file, int op_flag) { struct snd_ctl_tlv header; - unsigned int *container; + unsigned int __user *container; unsigned int container_size; struct snd_kcontrol *kctl; struct snd_ctl_elem_id id; From 2de841efaeafa9f597e495ffdf5a024079c4bfe7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 23 Apr 2018 08:59:36 +0200 Subject: [PATCH 494/660] ALSA: usb-audio: Fix forgotten conversion of control query functions The recent code refactoring made the argument for some helper functions to be the explicit UAC_CS_* and UAC2_CS_* value instead of 0-based offset. However, there was one place left forgotten, and it caused a regression on some devices appearing as the inconsistent mixer setup. This patch corrects the forgotten conversion. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199449 Fixes: 21e9b3e931f7 ("ALSA: usb-audio: fix uac control query argument") Tested-by: Nazar Mokrynskyi Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 301ad61ed426..3387483310b1 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1776,7 +1776,8 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, build_feature_ctl(state, _ftr, ch_bits, control, &iterm, unitid, ch_read_only); if (uac_v2v3_control_is_readable(master_bits, control)) - build_feature_ctl(state, _ftr, 0, i, &iterm, unitid, + build_feature_ctl(state, _ftr, 0, control, + &iterm, unitid, !uac_v2v3_control_is_writeable(master_bits, control)); } From e9add8bac6c69edb4bf391e537faa659b2ed70d2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 Apr 2018 11:08:19 +0200 Subject: [PATCH 495/660] perf evsel: Disable write_backward for leader sampling group events .. and other related fields that do not need to be enabled for events that have sampling leader. It fixes the perf top usage Ingo reported broken: # perf top -e '{cycles,msr/aperf/}:S' The 'msr/aperf/' event is configured for write_back sampling, which is not allowed by the MSR PMU, so it fails to create the event. Adjusting related attr test. Reported-by: Ingo Molnar Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180423090823.32309-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-record-group-sampling | 3 +++ tools/perf/util/evsel.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index f906b793196f..8a33ca4f9e1f 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -35,3 +35,6 @@ inherit=0 # sampling disabled sample_freq=0 sample_period=0 +freq=0 +write_backward=0 +sample_id_all=0 diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7eb1e9850abf..26bdeecc0452 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -930,8 +930,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * than leader in case leader 'leads' the sampling. */ if ((leader != evsel) && leader->sample_read) { - attr->sample_freq = 0; - attr->sample_period = 0; + attr->freq = 0; + attr->sample_freq = 0; + attr->sample_period = 0; + attr->write_backward = 0; + attr->sample_id_all = 0; } if (opts->no_samples) From 3138a2ef62667b6ac8eb5fb33a9e0b84ec3ab165 Mon Sep 17 00:00:00 2001 From: Sangwon Hong Date: Sun, 22 Apr 2018 16:29:06 +0900 Subject: [PATCH 496/660] perf mem: Document incorrect and missing options Several options were incorrectly described, some lacked describing required arguments while others were simply not documented, fix it. Signed-off-by: Sangwon Hong Acked-by: Jiri Olsa Cc: Namhyung Kim Cc: Taeung Song Link: http://lkml.kernel.org/r/1524382146-19609-1-git-send-email-qpakzk@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-mem.txt | 41 +++++++++++++++++++-------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 8806ed5f3802..f8d2167cf3e7 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -28,29 +28,46 @@ OPTIONS ...:: Any command you can specify in a shell. +-i:: +--input=:: + Input file name. + -f:: --force:: Don't do ownership validation -t:: ---type=:: +--type=:: Select the memory operation type: load or store (default: load,store) -D:: ---dump-raw-samples=:: +--dump-raw-samples:: Dump the raw decoded samples on the screen in a format that is easy to parse with one sample per line. -x:: ---field-separator:: +--field-separator=:: Specify the field separator used when dump raw samples (-D option). By default, The separator is the space character. -C:: ---cpu-list:: - Restrict dump of raw samples to those provided via this option. Note that the same - option can be passed in record mode. It will be interpreted the same way as perf - record. +--cpu=:: + Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a + comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default + is to monitor all CPUS. +-U:: +--hide-unresolved:: + Only display entries resolved to a symbol. + +-p:: +--phys-data:: + Record/Report sample physical addresses + +RECORD OPTIONS +-------------- +-e:: +--event :: + Event selector. Use 'perf mem record -e list' to list available events. -K:: --all-kernel:: @@ -60,12 +77,12 @@ OPTIONS --all-user:: Configure all used events to run in user space. ---ldload:: - Specify desired latency for loads event. +-v:: +--verbose:: + Be more verbose (show counter open errors, etc) --p:: ---phys-data:: - Record/Report sample physical addresses +--ldlat :: + Specify desired latency for loads event. In addition, for report all perf report options are valid, and for record all perf record options. From 5d9946c3e5e38e07ab7019db9413a96807a325f2 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 23 Apr 2018 16:29:40 +0200 Subject: [PATCH 497/660] perf record: Fix s390 undefined record__auxtrace_init() return value Command 'perf record' calls: cmd_report() record__auxtrace_init() auxtrace_record__init() On s390 function auxtrace_record__init() returns random return value due to missing initialization. This sometime causes 'perf record' to exit immediately without error message and creating a perf.data file. Fix this by setting error the return code to zero before returning from platform specific functions which may not set the error code in call cases. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180423142940.21143-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/auxtrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c index 6cb48e4cffd9..3afe8256eff2 100644 --- a/tools/perf/arch/s390/util/auxtrace.c +++ b/tools/perf/arch/s390/util/auxtrace.c @@ -87,6 +87,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, struct perf_evsel *pos; int diagnose = 0; + *err = 0; if (evlist->nr_entries == 0) return NULL; From f1919826896c82b6af9c46f69e02f2bc04df4be7 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 8 Apr 2018 09:54:39 +0800 Subject: [PATCH 498/660] ceph: check if mds create snaprealm when setting quota If mds does not, return -EOPNOTSUPP. Link: http://tracker.ceph.com/issues/23491 Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 7e72348639e4..315f7e63e7cc 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -228,7 +228,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci) { - return (ci->i_max_files || ci->i_max_bytes); + bool ret = false; + spin_lock(&ci->i_ceph_lock); + if ((ci->i_max_files || ci->i_max_bytes) && + ci->i_vino.snap == CEPH_NOSNAP && + ci->i_snap_realm && + ci->i_snap_realm->ino == ci->i_vino.ino) + ret = true; + spin_unlock(&ci->i_ceph_lock); + return ret; } static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val, @@ -1008,14 +1016,19 @@ int __ceph_setxattr(struct inode *inode, const char *name, char *newval = NULL; struct ceph_inode_xattr *xattr = NULL; int required_blob_size; + bool check_realm = false; bool lock_snap_rwsem = false; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; vxattr = ceph_match_vxattr(inode, name); - if (vxattr && vxattr->readonly) - return -EOPNOTSUPP; + if (vxattr) { + if (vxattr->readonly) + return -EOPNOTSUPP; + if (value && !strncmp(vxattr->name, "ceph.quota", 10)) + check_realm = true; + } /* pass any unhandled ceph.* xattrs through to the MDS */ if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) @@ -1109,6 +1122,15 @@ do_sync_unlocked: err = -EBUSY; } else { err = ceph_sync_setxattr(inode, name, value, size, flags); + if (err >= 0 && check_realm) { + /* check if snaprealm was created for quota inode */ + spin_lock(&ci->i_ceph_lock); + if ((ci->i_max_files || ci->i_max_bytes) && + !(ci->i_snap_realm && + ci->i_snap_realm->ino == ci->i_vino.ino)) + err = -EOPNOTSUPP; + spin_unlock(&ci->i_ceph_lock); + } } out: ceph_free_cap_flush(prealloc_cf); From ddea788c63094f7c483783265563dd5b50052e28 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 22 Apr 2018 19:11:50 +0800 Subject: [PATCH 499/660] bonding: do not set slave_dev npinfo before slave_enable_netpoll in bond_enslave After Commit 8a8efa22f51b ("bonding: sync netpoll code with bridge"), it would set slave_dev npinfo in slave_enable_netpoll when enslaving a dev if bond->dev->npinfo was set. However now slave_dev npinfo is set with bond->dev->npinfo before calling slave_enable_netpoll. With slave_dev npinfo set, __netpoll_setup called in slave_enable_netpoll will not call slave dev's .ndo_netpoll_setup(). It causes that the lower dev of this slave dev can't set its npinfo. One way to reproduce it: # modprobe bonding # brctl addbr br0 # brctl addif br0 eth1 # ifconfig bond0 192.168.122.1/24 up # ifenslave bond0 eth2 # systemctl restart netconsole # ifenslave bond0 br0 # ifconfig eth2 down # systemctl restart netconsole The netpoll won't really work. This patch is to remove that slave_dev npinfo setting in bond_enslave(). Fixes: 8a8efa22f51b ("bonding: sync netpoll code with bridge") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b7b113018853..718e4914e3a0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1660,8 +1660,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, } /* switch(bond_mode) */ #ifdef CONFIG_NET_POLL_CONTROLLER - slave_dev->npinfo = bond->dev->npinfo; - if (slave_dev->npinfo) { + if (bond->dev->npinfo) { if (slave_enable_netpoll(new_slave)) { netdev_info(bond_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n"); res = -EBUSY; From aa8f8778493c85fff480cdf8b349b1e1dcb5f243 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Apr 2018 18:29:23 -0700 Subject: [PATCH 500/660] ipv6: add RTA_TABLE and RTA_PREFSRC to rtm_ipv6_policy KMSAN reported use of uninit-value that I tracked to lack of proper size check on RTA_TABLE attribute. I also believe RTA_PREFSRC lacks a similar check. Fixes: 86872cb57925 ("[IPv6] route: FIB6 configuration using struct fib6_config") Fixes: c3968a857a6b ("ipv6: RTA_PREFSRC support for ipv6 route source address selection") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 49b954d6d0fa..cde7d8251377 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3975,6 +3975,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, + [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, @@ -3986,6 +3987,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_EXPIRES] = { .type = NLA_U32 }, [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, From 71c751f2a43fa03fae3cf5f0067ed3001a397013 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Apr 2018 11:41:33 +0100 Subject: [PATCH 501/660] arm64: add sentinel to kpti_safe_list We're missing a sentinel entry in kpti_safe_list. Thus is_midr_in_range_list() can walk past the end of kpti_safe_list. Depending on the contents of memory, this could erroneously match a CPU's MIDR, cause a data abort, or other bad outcomes. Add the sentinel entry to avoid this. Fixes: be5b299830c63ed7 ("arm64: capabilities: Add support for checks based on a list of MIDRs") Signed-off-by: Mark Rutland Reported-by: Jan Kiszka Tested-by: Jan Kiszka Reviewed-by: Suzuki K Poulose Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 536d572e5596..9d1b06d67c53 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -868,6 +868,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, static const struct midr_range kpti_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + { /* sentinel */ } }; char const *str = "command line option"; From 47016b341fc3b3fd4909e058c6fa38f165b53646 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 23 Apr 2018 12:45:11 -0500 Subject: [PATCH 502/660] mtd: spi-nor: cadence-quadspi: Fix page fault kernel panic The current Cadence QSPI driver caused a kernel panic when loading a Root Filesystem from QSPI. The problem was caused by reading more bytes than needed because the QSPI operated on 4 bytes at a time. [ 7.947754] spi_nor_read[1048]:from 0x037cad74, len 1 [bfe07fff] [ 7.956247] cqspi_read[910]:offset 0x58502516, buffer=bfe07fff [ 7.956247] [ 7.966046] Unable to handle kernel paging request at virtual address bfe08002 [ 7.973239] pgd = eebfc000 [ 7.975931] [bfe08002] *pgd=2fffb811, *pte=00000000, *ppte=00000000 Notice above how only 1 byte needed to be read but by reading 4 bytes into the end of a mapped page, an unrecoverable page fault occurred. This patch uses a temporary buffer to hold the 4 bytes read and then copies only the bytes required into the buffer. A min() function is used to limit the length to prevent buffer overflows. Request testing of this patch on other platforms. This was tested on the Intel Arria10 SoCFPGA DevKit. Fixes: 0cf1725676a97fc8 ("mtd: spi-nor: cqspi: Fix build on arches missing readsl/writesl") Signed-off-by: Thor Thayer Cc: Reviewed-by: Marek Vasut Signed-off-by: Boris Brezillon --- drivers/mtd/spi-nor/cadence-quadspi.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index 4b8e9183489a..5872f31eaa60 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -501,7 +501,9 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf, void __iomem *reg_base = cqspi->iobase; void __iomem *ahb_base = cqspi->ahb_base; unsigned int remaining = n_rx; + unsigned int mod_bytes = n_rx % 4; unsigned int bytes_to_read = 0; + u8 *rxbuf_end = rxbuf + n_rx; int ret = 0; writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR); @@ -530,11 +532,24 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf, } while (bytes_to_read != 0) { + unsigned int word_remain = round_down(remaining, 4); + bytes_to_read *= cqspi->fifo_width; bytes_to_read = bytes_to_read > remaining ? remaining : bytes_to_read; - ioread32_rep(ahb_base, rxbuf, - DIV_ROUND_UP(bytes_to_read, 4)); + bytes_to_read = round_down(bytes_to_read, 4); + /* Read 4 byte word chunks then single bytes */ + if (bytes_to_read) { + ioread32_rep(ahb_base, rxbuf, + (bytes_to_read / 4)); + } else if (!word_remain && mod_bytes) { + unsigned int temp = ioread32(ahb_base); + + bytes_to_read = mod_bytes; + memcpy(rxbuf, &temp, min((unsigned int) + (rxbuf_end - rxbuf), + bytes_to_read)); + } rxbuf += bytes_to_read; remaining -= bytes_to_read; bytes_to_read = cqspi_get_rd_sram_level(cqspi); From fb5924fddf9ee31db04da7ad4e8c3434a387101b Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 6 Apr 2018 15:24:23 +1000 Subject: [PATCH 503/660] powerpc/mm: Flush cache on memory hot(un)plug This patch adds support for flushing potentially dirty cache lines when memory is hot-plugged/hot-un-plugged. The support is currently limited to 64 bit systems. The bug was exposed when mappings for a device were actually hot-unplugged and plugged in back later. A similar issue was observed during the development of memtrace, but memtrace does it's own flushing of region via a custom routine. These patches do a flush both on hotplug/unplug to clear any stale data in the cache w.r.t mappings, there is a small race window where a clean cache line may be created again just prior to tearing down the mapping. The patches were tested by disabling the flush routines in memtrace and doing I/O on the trace file. The system immediately checkstops (quite reliablly if prior to the hot-unplug of the memtrace region, we memset the regions we are about to hot unplug). After these patches no custom flushing is needed in the memtrace code. Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Balbir Singh Acked-by: Reza Arbab Reviewed-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 737f8a4632cc..c3c39b02b2ba 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -133,6 +133,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * start, start + size, rc); return -EFAULT; } + flush_inval_dcache_range(start, start + size); return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } @@ -159,6 +160,7 @@ int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); + flush_inval_dcache_range(start, start + size); ret = remove_section_mapping(start, start + size); /* Ensure all vmalloc mappings are flushed in case they also From 7fd6641de28fe9b5bce0c38d2adee0a72a72619e Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 6 Apr 2018 15:24:24 +1000 Subject: [PATCH 504/660] powerpc/powernv/memtrace: Let the arch hotunplug code flush cache Don't do this via custom code, instead now that we have support in the arch hotplug/hotunplug code, rely on those routines to do the right thing. The existing flush doesn't work because it uses ppc64_caches.l1d.size instead of ppc64_caches.l1d.line_size. Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing") Signed-off-by: Balbir Singh Reviewed-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/memtrace.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index de470caf0784..fc222a0c2ac4 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -82,19 +82,6 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; -static void flush_memory_region(u64 base, u64 size) -{ - unsigned long line_size = ppc64_caches.l1d.size; - u64 end = base + size; - u64 addr; - - base = round_down(base, line_size); - end = round_up(end, line_size); - - for (addr = base; addr < end; addr += line_size) - asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory"); -} - static int check_memblock_online(struct memory_block *mem, void *arg) { if (mem->state != MEM_ONLINE) @@ -132,10 +119,6 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, change_memblock_state); - /* RCU grace period? */ - flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT), - nr_pages << PAGE_SHIFT); - lock_device_hotplug(); remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); unlock_device_hotplug(); From 28a5933e8d362766462ea9e5f135e19f41e658ba Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 11 Apr 2018 16:38:54 +1000 Subject: [PATCH 505/660] powerpc/powernv/npu: Add lock to prevent race in concurrent context init/destroy The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions are used to allocate/free contexts to allow address translation and shootdown by the NPU on a particular GPU. Context initialisation is implicitly safe as it is protected by the requirement mmap_sem be held in write mode, however pnv_npu2_destroy_context() does not require mmap_sem to be held and it is not safe to call with a concurrent initialisation for a different GPU. It was assumed the driver would ensure destruction was not called concurrently with initialisation. However the driver may be simplified by allowing concurrent initialisation and destruction for different GPUs. As npu context creation/destruction is not a performance critical path and the critical section is not large a single spinlock is used for simplicity. Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alistair Popple Reviewed-by: Mark Hairgrove Tested-by: Mark Hairgrove Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 51 +++++++++++++++++++----- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 69a4f9e8bd55..5ff7c6e0e6da 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -33,6 +33,12 @@ #define npu_to_phb(x) container_of(x, struct pnv_phb, npu) +/* + * spinlock to protect initialisation of an npu_context for a particular + * mm_struct. + */ +static DEFINE_SPINLOCK(npu_context_lock); + /* * Other types of TCE cache invalidation are not functional in the * hardware. @@ -696,7 +702,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { * Returns an error if there no contexts are currently available or a * npu_context which should be passed to pnv_npu2_handle_fault(). * - * mmap_sem must be held in write mode. + * mmap_sem must be held in write mode and must not be called from interrupt + * context. */ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, unsigned long flags, @@ -743,7 +750,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* * Setup the NPU context table for a particular GPU. These need to be * per-GPU as we need the tables to filter ATSDs when there are no - * active contexts on a particular GPU. + * active contexts on a particular GPU. It is safe for these to be + * called concurrently with destroy as the OPAL call takes appropriate + * locks and refcounts on init/destroy. */ rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); @@ -754,8 +763,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, * We store the npu pci device so we can more easily get at the * associated npus. */ + spin_lock(&npu_context_lock); npu_context = mm->context.npu_context; + if (npu_context) + WARN_ON(!kref_get_unless_zero(&npu_context->kref)); + spin_unlock(&npu_context_lock); + if (!npu_context) { + /* + * We can set up these fields without holding the + * npu_context_lock as the npu_context hasn't been returned to + * the caller meaning it can't be destroyed. Parallel allocation + * is protected against by mmap_sem. + */ rc = -ENOMEM; npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); if (npu_context) { @@ -774,8 +794,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, } mm->context.npu_context = npu_context; - } else { - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); } npu_context->release_cb = cb; @@ -814,15 +832,16 @@ static void pnv_npu2_release_context(struct kref *kref) mm_context_remove_copro(npu_context->mm); npu_context->mm->context.npu_context = NULL; - mmu_notifier_unregister(&npu_context->mn, - npu_context->mm); - - kfree(npu_context); } +/* + * Destroy a context on the given GPU. May free the npu_context if it is no + * longer active on any GPUs. Must not be called from interrupt context. + */ void pnv_npu2_destroy_context(struct npu_context *npu_context, struct pci_dev *gpdev) { + int removed; struct pnv_phb *nphb; struct npu *npu; struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); @@ -844,7 +863,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); - kref_put(&npu_context->kref, pnv_npu2_release_context); + spin_lock(&npu_context_lock); + removed = kref_put(&npu_context->kref, pnv_npu2_release_context); + spin_unlock(&npu_context_lock); + + /* + * We need to do this outside of pnv_npu2_release_context so that it is + * outside the spinlock as mmu_notifier_destroy uses SRCU. + */ + if (removed) { + mmu_notifier_unregister(&npu_context->mn, + npu_context->mm); + + kfree(npu_context); + } + } EXPORT_SYMBOL(pnv_npu2_destroy_context); From a1409adac748f0db655e096521bbe6904aadeb98 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 11 Apr 2018 16:38:55 +1000 Subject: [PATCH 506/660] powerpc/powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters There is a single npu context per set of callback parameters. Callers should be prevented from overwriting existing callback values so instead return an error if different parameters are passed. Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alistair Popple Reviewed-by: Mark Hairgrove Tested-by: Mark Hairgrove Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/powernv.h | 2 +- arch/powerpc/platforms/powernv/npu-dma.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index d1c2d2e658cf..2f3ff7a27881 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -15,7 +15,7 @@ extern void powernv_set_nmmu_ptcr(unsigned long ptcr); extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, unsigned long flags, - struct npu_context *(*cb)(struct npu_context *, void *), + void (*cb)(struct npu_context *, void *), void *priv); extern void pnv_npu2_destroy_context(struct npu_context *context, struct pci_dev *gpdev); diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 5ff7c6e0e6da..ccd57d1b5bf8 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -407,7 +407,7 @@ struct npu_context { bool nmmu_flush; /* Callback to stop translation requests on a given GPU */ - struct npu_context *(*release_cb)(struct npu_context *, void *); + void (*release_cb)(struct npu_context *context, void *priv); /* * Private pointer passed to the above callback for usage by @@ -707,7 +707,7 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { */ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, unsigned long flags, - struct npu_context *(*cb)(struct npu_context *, void *), + void (*cb)(struct npu_context *, void *), void *priv) { int rc; @@ -765,8 +765,18 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, */ spin_lock(&npu_context_lock); npu_context = mm->context.npu_context; - if (npu_context) + if (npu_context) { + if (npu_context->release_cb != cb || + npu_context->priv != priv) { + spin_unlock(&npu_context_lock); + opal_npu_destroy_context(nphb->opal_id, mm->context.id, + PCI_DEVID(gpdev->bus->number, + gpdev->devfn)); + return ERR_PTR(-EINVAL); + } + WARN_ON(!kref_get_unless_zero(&npu_context->kref)); + } spin_unlock(&npu_context_lock); if (!npu_context) { From d0cf9b561ca97d5245bb9e0c4774b7fadd897d67 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 17 Apr 2018 19:11:28 +1000 Subject: [PATCH 507/660] powerpc/powernv/npu: Do a PID GPU TLB flush when invalidating a large address range The NPU has a limited number of address translation shootdown (ATSD) registers and the GPU has limited bandwidth to process ATSDs. This can result in contention of ATSD registers leading to soft lockups on some threads, particularly when invalidating a large address range in pnv_npu2_mn_invalidate_range(). At some threshold it becomes more efficient to flush the entire GPU TLB for the given MM context (PID) than individually flushing each address in the range. This patch will result in ranges greater than 2MB being converted from 32+ ATSDs into a single ATSD which will flush the TLB for the given PID on each GPU. Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alistair Popple Acked-by: Balbir Singh Tested-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index ccd57d1b5bf8..525e966dce34 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -39,6 +39,13 @@ */ static DEFINE_SPINLOCK(npu_context_lock); +/* + * When an address shootdown range exceeds this threshold we invalidate the + * entire TLB on the GPU for the given PID rather than each specific address in + * the range. + */ +#define ATSD_THRESHOLD (2*1024*1024) + /* * Other types of TCE cache invalidation are not functional in the * hardware. @@ -677,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - for (address = start; address < end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address, false); + if (end - start > ATSD_THRESHOLD) { + /* + * Just invalidate the entire PID if the address range is too + * large. + */ + mmio_invalidate(npu_context, 0, 0, true); + } else { + for (address = start; address < end; address += PAGE_SIZE) + mmio_invalidate(npu_context, 1, address, false); - /* Do the flush only on the final addess == end */ - mmio_invalidate(npu_context, 1, address, true); + /* Do the flush only on the final addess == end */ + mmio_invalidate(npu_context, 1, address, true); + } } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { From b6a930fa88083b41d26ddf1cab95cbd740936c22 Mon Sep 17 00:00:00 2001 From: Jingju Hou Date: Mon, 23 Apr 2018 15:22:49 +0800 Subject: [PATCH 508/660] net: phy: marvell: clear wol event before setting it If WOL event happened once, the LED[2] interrupt pin will not be cleared unless we read the CSISR register. If interrupts are in use, the normal interrupt handling will clear the WOL event. Let's clear the WOL event before enabling it if !phy_interrupt_is_valid(). Signed-off-by: Jingju Hou Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index c22e8e383247..25e2a099b71c 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1393,6 +1393,15 @@ static int m88e1318_set_wol(struct phy_device *phydev, if (err < 0) goto error; + /* If WOL event happened once, the LED[2] interrupt pin + * will not be cleared unless we reading the interrupt status + * register. If interrupts are in use, the normal interrupt + * handling will clear the WOL event. Clear the WOL event + * before enabling it if !phy_interrupt_is_valid() + */ + if (!phy_interrupt_is_valid(phydev)) + phy_read(phydev, MII_M1011_IEVENT); + /* Enable the WOL interrupt */ err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0, MII_88E1318S_PHY_CSIER_WOL_EIE); From eb1c28c05894a4b1f6b56c5bf072205e64cfa280 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 23 Apr 2018 16:15:14 +0200 Subject: [PATCH 509/660] l2tp: check sockaddr length in pppol2tp_connect() Check sockaddr_len before dereferencing sp->sa_protocol, to ensure that it actually points to valid data. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Reported-by: syzbot+a70ac890b23b1bf29f5c@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1404bc1c1bb7..1fd9e145076a 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -619,6 +619,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); error = -EINVAL; + + if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6)) + goto end; + if (sp->sa_protocol != PX_PROTO_OL2TP) goto end; From a49e2f5d5fb141884452ddb428f551b123d436b5 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 23 Apr 2018 16:38:27 +0200 Subject: [PATCH 510/660] pppoe: check sockaddr length in pppoe_connect() We must validate sockaddr_len, otherwise userspace can pass fewer data than we expect and we end up accessing invalid data. Fixes: 224cf5ad14c0 ("ppp: Move the PPP drivers") Reported-by: syzbot+4f03bdf92fdf9ef5ddab@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 1483bc7b01e1..7df07337d69c 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -620,6 +620,10 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); error = -EINVAL; + + if (sockaddr_len != sizeof(struct sockaddr_pppox)) + goto end; + if (sp->sa_protocol != PX_PROTO_OE) goto end; From 4d945663a6a0acf3cbe45940503f2eb9584bfee7 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 23 Apr 2018 11:43:08 -0500 Subject: [PATCH 511/660] amd-xgbe: Add pre/post auto-negotiation phy hooks Add hooks to the driver auto-negotiation (AN) flow to allow the different phy implementations to perform any steps necessary to improve AN. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 16 ++++++++++++++-- drivers/net/ethernet/amd/xgbe/xgbe.h | 5 +++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 072b9f664597..e3d361e242aa 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -437,6 +437,9 @@ static void xgbe_an73_disable(struct xgbe_prv_data *pdata) static void xgbe_an_restart(struct xgbe_prv_data *pdata) { + if (pdata->phy_if.phy_impl.an_pre) + pdata->phy_if.phy_impl.an_pre(pdata); + switch (pdata->an_mode) { case XGBE_AN_MODE_CL73: case XGBE_AN_MODE_CL73_REDRV: @@ -453,6 +456,9 @@ static void xgbe_an_restart(struct xgbe_prv_data *pdata) static void xgbe_an_disable(struct xgbe_prv_data *pdata) { + if (pdata->phy_if.phy_impl.an_post) + pdata->phy_if.phy_impl.an_post(pdata); + switch (pdata->an_mode) { case XGBE_AN_MODE_CL73: case XGBE_AN_MODE_CL73_REDRV: @@ -637,11 +643,11 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) return XGBE_AN_NO_LINK; } - xgbe_an73_disable(pdata); + xgbe_an_disable(pdata); xgbe_switch_mode(pdata); - xgbe_an73_restart(pdata); + xgbe_an_restart(pdata); return XGBE_AN_INCOMPAT_LINK; } @@ -820,6 +826,9 @@ static void xgbe_an37_state_machine(struct xgbe_prv_data *pdata) pdata->an_result = pdata->an_state; pdata->an_state = XGBE_AN_READY; + if (pdata->phy_if.phy_impl.an_post) + pdata->phy_if.phy_impl.an_post(pdata); + netif_dbg(pdata, link, pdata->netdev, "CL37 AN result: %s\n", xgbe_state_as_string(pdata->an_result)); } @@ -903,6 +912,9 @@ again: pdata->kx_state = XGBE_RX_BPA; pdata->an_start = 0; + if (pdata->phy_if.phy_impl.an_post) + pdata->phy_if.phy_impl.an_post(pdata); + netif_dbg(pdata, link, pdata->netdev, "CL73 AN result: %s\n", xgbe_state_as_string(pdata->an_result)); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index ad102c8bac7b..fa0b51ea1b95 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -833,6 +833,7 @@ struct xgbe_hw_if { /* This structure represents implementation specific routines for an * implementation of a PHY. All routines are required unless noted below. * Optional routines: + * an_pre, an_post * kr_training_pre, kr_training_post */ struct xgbe_phy_impl_if { @@ -875,6 +876,10 @@ struct xgbe_phy_impl_if { /* Process results of auto-negotiation */ enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *); + /* Pre/Post auto-negotiation support */ + void (*an_pre)(struct xgbe_prv_data *); + void (*an_post)(struct xgbe_prv_data *); + /* Pre/Post KR training enablement support */ void (*kr_training_pre)(struct xgbe_prv_data *); void (*kr_training_post)(struct xgbe_prv_data *); From 96f4d430c507ed4856048c2dc9c1a2ea5b5e74e4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 23 Apr 2018 11:43:17 -0500 Subject: [PATCH 512/660] amd-xgbe: Improve KR auto-negotiation and training Update xgbe-phy-v2.c to make use of the auto-negotiation (AN) phy hooks to improve the ability to successfully complete Clause 73 AN when running at 10gbps. Hardware can sometimes have issues with CDR lock when the AN DME page exchange is being performed. The AN and KR training hooks are used as follows: - The pre AN hook is used to disable CDR tracking in the PHY so that the DME page exchange can be successfully and consistently completed. - The post KR training hook is used to re-enable the CDR tracking so that KR training can successfully complete. - The post AN hook is used to check for an unsuccessful AN which will increase a CDR tracking enablement delay (up to a maximum value). Add two debugfs entries to allow control over use of the CDR tracking workaround. The debugfs entries allow the CDR tracking workaround to be disabled and determine whether to re-enable CDR tracking before or after link training has been initiated. Also, with these changes the receiver reset cycle that is performed during the link status check can be performed less often. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 8 ++ drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c | 16 +++ drivers/net/ethernet/amd/xgbe/xgbe-main.c | 1 + drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 8 +- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 2 + drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 125 ++++++++++++++++++- drivers/net/ethernet/amd/xgbe/xgbe.h | 4 + 7 files changed, 160 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 7ea72ef11a55..d272dc6984ac 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1321,6 +1321,10 @@ #define MDIO_VEND2_AN_STAT 0x8002 #endif +#ifndef MDIO_VEND2_PMA_CDR_CONTROL +#define MDIO_VEND2_PMA_CDR_CONTROL 0x8056 +#endif + #ifndef MDIO_CTRL1_SPEED1G #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) #endif @@ -1369,6 +1373,10 @@ #define XGBE_AN_CL37_TX_CONFIG_MASK 0x08 #define XGBE_AN_CL37_MII_CTRL_8BIT 0x0100 +#define XGBE_PMA_CDR_TRACK_EN_MASK 0x01 +#define XGBE_PMA_CDR_TRACK_EN_OFF 0x00 +#define XGBE_PMA_CDR_TRACK_EN_ON 0x01 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index 7d128be61310..b91143947ed2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -519,6 +519,22 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata) "debugfs_create_file failed\n"); } + if (pdata->vdata->an_cdr_workaround) { + pfile = debugfs_create_bool("an_cdr_workaround", 0600, + pdata->xgbe_debugfs, + &pdata->debugfs_an_cdr_workaround); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_bool failed\n"); + + pfile = debugfs_create_bool("an_cdr_track_early", 0600, + pdata->xgbe_debugfs, + &pdata->debugfs_an_cdr_track_early); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_bool failed\n"); + } + kfree(buf); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 795e556d4a3f..441d0973957b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -349,6 +349,7 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata) XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); /* Call MDIO/PHY initialization routine */ + pdata->debugfs_an_cdr_workaround = pdata->vdata->an_cdr_workaround; ret = pdata->phy_if.phy_init(pdata); if (ret) return ret; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index e3d361e242aa..1b45cd73a258 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -432,6 +432,8 @@ static void xgbe_an73_disable(struct xgbe_prv_data *pdata) xgbe_an73_set(pdata, false, false); xgbe_an73_disable_interrupts(pdata); + pdata->an_start = 0; + netif_dbg(pdata, link, pdata->netdev, "CL73 AN disabled\n"); } @@ -511,11 +513,11 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); - if (pdata->phy_if.phy_impl.kr_training_post) - pdata->phy_if.phy_impl.kr_training_post(pdata); - netif_dbg(pdata, link, pdata->netdev, "KR training initiated\n"); + + if (pdata->phy_if.phy_impl.kr_training_post) + pdata->phy_if.phy_impl.kr_training_post(pdata); } return XGBE_AN_PAGE_RECEIVED; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index eb23f9ba1a9a..82d1f416ee2a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -456,6 +456,7 @@ static const struct xgbe_version_data xgbe_v2a = { .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, + .an_cdr_workaround = 1, }; static const struct xgbe_version_data xgbe_v2b = { @@ -470,6 +471,7 @@ static const struct xgbe_version_data xgbe_v2b = { .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, + .an_cdr_workaround = 1, }; static const struct pci_device_id xgbe_pci_table[] = { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 3304a291aa96..b48efc04c4da 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -147,6 +147,14 @@ /* Rate-change complete wait/retry count */ #define XGBE_RATECHANGE_COUNT 500 +/* CDR delay values for KR support (in usec) */ +#define XGBE_CDR_DELAY_INIT 10000 +#define XGBE_CDR_DELAY_INC 10000 +#define XGBE_CDR_DELAY_MAX 100000 + +/* RRC frequency during link status check */ +#define XGBE_RRC_FREQUENCY 10 + enum xgbe_port_mode { XGBE_PORT_MODE_RSVD = 0, XGBE_PORT_MODE_BACKPLANE, @@ -355,6 +363,10 @@ struct xgbe_phy_data { unsigned int redrv_addr; unsigned int redrv_lane; unsigned int redrv_model; + + /* KR AN support */ + unsigned int phy_cdr_notrack; + unsigned int phy_cdr_delay; }; /* I2C, MDIO and GPIO lines are muxed, so only one device at a time */ @@ -2361,7 +2373,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) return 1; /* No link, attempt a receiver reset cycle */ - if (phy_data->rrc_count++) { + if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) { phy_data->rrc_count = 0; xgbe_phy_rrc(pdata); } @@ -2669,6 +2681,103 @@ static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata) return true; } +static void xgbe_phy_cdr_track(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (!pdata->debugfs_an_cdr_workaround) + return; + + if (!phy_data->phy_cdr_notrack) + return; + + usleep_range(phy_data->phy_cdr_delay, + phy_data->phy_cdr_delay + 500); + + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL, + XGBE_PMA_CDR_TRACK_EN_MASK, + XGBE_PMA_CDR_TRACK_EN_ON); + + phy_data->phy_cdr_notrack = 0; +} + +static void xgbe_phy_cdr_notrack(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (!pdata->debugfs_an_cdr_workaround) + return; + + if (phy_data->phy_cdr_notrack) + return; + + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL, + XGBE_PMA_CDR_TRACK_EN_MASK, + XGBE_PMA_CDR_TRACK_EN_OFF); + + xgbe_phy_rrc(pdata); + + phy_data->phy_cdr_notrack = 1; +} + +static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata) +{ + if (!pdata->debugfs_an_cdr_track_early) + xgbe_phy_cdr_track(pdata); +} + +static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata) +{ + if (pdata->debugfs_an_cdr_track_early) + xgbe_phy_cdr_track(pdata); +} + +static void xgbe_phy_an_post(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + if (phy_data->cur_mode != XGBE_MODE_KR) + break; + + xgbe_phy_cdr_track(pdata); + + switch (pdata->an_result) { + case XGBE_AN_READY: + case XGBE_AN_COMPLETE: + break; + default: + if (phy_data->phy_cdr_delay < XGBE_CDR_DELAY_MAX) + phy_data->phy_cdr_delay += XGBE_CDR_DELAY_INC; + else + phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT; + break; + } + break; + default: + break; + } +} + +static void xgbe_phy_an_pre(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + if (phy_data->cur_mode != XGBE_MODE_KR) + break; + + xgbe_phy_cdr_notrack(pdata); + break; + default: + break; + } +} + static void xgbe_phy_stop(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; @@ -2680,6 +2789,9 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) xgbe_phy_sfp_reset(phy_data); xgbe_phy_sfp_mod_absent(pdata); + /* Reset CDR support */ + xgbe_phy_cdr_track(pdata); + /* Power off the PHY */ xgbe_phy_power_off(pdata); @@ -2712,6 +2824,9 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata) /* Start in highest supported mode */ xgbe_phy_set_mode(pdata, phy_data->start_mode); + /* Reset CDR support */ + xgbe_phy_cdr_track(pdata); + /* After starting the I2C controller, we can check for an SFP */ switch (phy_data->port_mode) { case XGBE_PORT_MODE_SFP: @@ -3019,6 +3134,8 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) } } + phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT; + /* Register for driving external PHYs */ mii = devm_mdiobus_alloc(pdata->dev); if (!mii) { @@ -3071,4 +3188,10 @@ void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if) phy_impl->an_advertising = xgbe_phy_an_advertising; phy_impl->an_outcome = xgbe_phy_an_outcome; + + phy_impl->an_pre = xgbe_phy_an_pre; + phy_impl->an_post = xgbe_phy_an_post; + + phy_impl->kr_training_pre = xgbe_phy_kr_training_pre; + phy_impl->kr_training_post = xgbe_phy_kr_training_post; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index fa0b51ea1b95..95d4b56448c6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -994,6 +994,7 @@ struct xgbe_version_data { unsigned int irq_reissue_support; unsigned int tx_desc_prefetch; unsigned int rx_desc_prefetch; + unsigned int an_cdr_workaround; }; struct xgbe_vxlan_data { @@ -1262,6 +1263,9 @@ struct xgbe_prv_data { unsigned int debugfs_xprop_reg; unsigned int debugfs_xi2c_reg; + + bool debugfs_an_cdr_workaround; + bool debugfs_an_cdr_track_early; }; /* Function prototypes*/ From 117df655f8ed51adb6e6b163812a06ebeae9f453 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 23 Apr 2018 11:43:34 -0500 Subject: [PATCH 513/660] amd-xgbe: Only use the SFP supported transceiver signals The SFP eeprom indicates the transceiver signals (Rx LOS, Tx Fault, etc.) that it supports. Update the driver to include checking the eeprom data when deciding whether to use a transceiver signal. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 71 ++++++++++++++++----- 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index b48efc04c4da..aac884314000 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -253,6 +253,10 @@ enum xgbe_sfp_speed { #define XGBE_SFP_BASE_VENDOR_SN 4 #define XGBE_SFP_BASE_VENDOR_SN_LEN 16 +#define XGBE_SFP_EXTD_OPT1 1 +#define XGBE_SFP_EXTD_OPT1_RX_LOS BIT(1) +#define XGBE_SFP_EXTD_OPT1_TX_FAULT BIT(3) + #define XGBE_SFP_EXTD_DIAG 28 #define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE BIT(2) @@ -332,6 +336,7 @@ struct xgbe_phy_data { unsigned int sfp_gpio_address; unsigned int sfp_gpio_mask; + unsigned int sfp_gpio_inputs; unsigned int sfp_gpio_rx_los; unsigned int sfp_gpio_tx_fault; unsigned int sfp_gpio_mod_absent; @@ -986,6 +991,49 @@ static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata) phy_data->sfp_phy_avail = 1; } +static bool xgbe_phy_check_sfp_rx_los(struct xgbe_phy_data *phy_data) +{ + u8 *sfp_extd = phy_data->sfp_eeprom.extd; + + if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_RX_LOS)) + return false; + + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) + return false; + + if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_rx_los)) + return true; + + return false; +} + +static bool xgbe_phy_check_sfp_tx_fault(struct xgbe_phy_data *phy_data) +{ + u8 *sfp_extd = phy_data->sfp_eeprom.extd; + + if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_TX_FAULT)) + return false; + + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) + return false; + + if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_tx_fault)) + return true; + + return false; +} + +static bool xgbe_phy_check_sfp_mod_absent(struct xgbe_phy_data *phy_data) +{ + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) + return false; + + if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_mod_absent)) + return true; + + return false; +} + static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; @@ -1031,6 +1079,10 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP) return; + /* Update transceiver signals (eeprom extd/options) */ + phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data); + phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data); + if (xgbe_phy_sfp_parse_quirks(pdata)) return; @@ -1196,7 +1248,6 @@ put: static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int gpio_input; u8 gpio_reg, gpio_ports[2]; int ret; @@ -1211,23 +1262,9 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) return; } - gpio_input = (gpio_ports[1] << 8) | gpio_ports[0]; + phy_data->sfp_gpio_inputs = (gpio_ports[1] << 8) | gpio_ports[0]; - if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) { - /* No GPIO, just assume the module is present for now */ - phy_data->sfp_mod_absent = 0; - } else { - if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent))) - phy_data->sfp_mod_absent = 0; - } - - if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) && - (gpio_input & (1 << phy_data->sfp_gpio_rx_los))) - phy_data->sfp_rx_los = 1; - - if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) && - (gpio_input & (1 << phy_data->sfp_gpio_tx_fault))) - phy_data->sfp_tx_fault = 1; + phy_data->sfp_mod_absent = xgbe_phy_check_sfp_mod_absent(phy_data); } static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata) From 75ecfb49516c53da00c57b9efe48fa3f5504a791 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 23 Apr 2018 10:29:27 +0530 Subject: [PATCH 514/660] powerpc/mce: Fix a bug where mce loops on memory UE. The current code extracts the physical address for UE errors and then hooks it up into memory failure infrastructure. On successful extraction of physical address it wrongly sets "handled = 1" which means this UE error has been recovered. Since MCE handler gets return value as handled = 1, it assumes that error has been recovered and goes back to same NIP. This causes MCE interrupt again and again in a loop leading to hard lockup. Also, initialize phys_addr to ULONG_MAX so that we don't end up queuing undesired page to hwpoison. Without this patch we see: Severe Machine check interrupt [Recovered] NIP: [000000001002588c] PID: 7109 Comm: find Initiator: CPU Error type: UE [Load/Store] Effective address: 00007fffd2755940 Physical address: 000020181a080000 ... Severe Machine check interrupt [Recovered] NIP: [000000001002588c] PID: 7109 Comm: find Initiator: CPU Error type: UE [Load/Store] Effective address: 00007fffd2755940 Physical address: 000020181a080000 Severe Machine check interrupt [Recovered] NIP: [000000001002588c] PID: 7109 Comm: find Initiator: CPU Error type: UE [Load/Store] Effective address: 00007fffd2755940 Physical address: 000020181a080000 Memory failure: 0x20181a08: recovery action for dirty LRU page: Recovered Memory failure: 0x20181a08: already hardware poisoned Memory failure: 0x20181a08: already hardware poisoned Memory failure: 0x20181a08: already hardware poisoned Memory failure: 0x20181a08: already hardware poisoned Memory failure: 0x20181a08: already hardware poisoned Memory failure: 0x20181a08: already hardware poisoned ... Watchdog CPU:38 Hard LOCKUP After this patch we see: Severe Machine check interrupt [Not recovered] NIP: [00007fffaae585f4] PID: 7168 Comm: find Initiator: CPU Error type: UE [Load/Store] Effective address: 00007fffaafe28ac Physical address: 00002017c0bd0000 find[7168]: unhandled signal 7 at 00007fffaae585f4 nip 00007fffaae585f4 lr 00007fffaae585e0 code 4 Memory failure: 0x2017c0bd: recovery action for dirty LRU page: Recovered Fixes: 01eaac2b0591 ("powerpc/mce: Hookup ierror (instruction) UE errors") Fixes: ba41e1e1ccb9 ("powerpc/mce: Hookup derror (load/store) UE errors") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Mahesh Salgaonkar Signed-off-by: Balbir Singh Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce_power.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index fe6fc63251fe..38c5b4764bfe 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -441,7 +441,6 @@ static int mce_handle_ierror(struct pt_regs *regs, if (pfn != ULONG_MAX) { *phys_addr = (pfn << PAGE_SHIFT); - handled = 1; } } } @@ -532,9 +531,7 @@ static int mce_handle_derror(struct pt_regs *regs, * kernel/exception-64s.h */ if (get_paca()->in_mce < MAX_MCE_DEPTH) - if (!mce_find_instr_ea_and_pfn(regs, addr, - phys_addr)) - handled = 1; + mce_find_instr_ea_and_pfn(regs, addr, phys_addr); } found = 1; } @@ -572,7 +569,7 @@ static long mce_handle_error(struct pt_regs *regs, const struct mce_ierror_table itable[]) { struct mce_error_info mce_err = { 0 }; - uint64_t addr, phys_addr; + uint64_t addr, phys_addr = ULONG_MAX; uint64_t srr1 = regs->msr; long handled; From 84749d83758af6576552046b215b9b7f37f9556b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 21 Apr 2018 10:19:29 +0200 Subject: [PATCH 515/660] x86/microcode/intel: Save microcode patch unconditionally save_mc_for_early() was a no-op on !CONFIG_HOTPLUG_CPU but the generic_load_microcode() path saves the microcode patches it has found into the cache of patches which is used for late loading too. Regardless of whether CPU hotplug is used or not. Make the saving unconditional so that late loading can find the proper patch. Reported-by: Vitezslav Samel Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Vitezslav Samel Tested-by: Ashok Raj Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180418081140.GA2439@pc11.op.pod.cz Link: https://lkml.kernel.org/r/20180421081930.15741-1-bp@alien8.de --- arch/x86/kernel/cpu/microcode/intel.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 32b8e5724f96..1c2cfa0644aa 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -485,7 +485,6 @@ static void show_saved_mc(void) */ static void save_mc_for_early(u8 *mc, unsigned int size) { -#ifdef CONFIG_HOTPLUG_CPU /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); @@ -495,7 +494,6 @@ static void save_mc_for_early(u8 *mc, unsigned int size) show_saved_mc(); mutex_unlock(&x86_cpu_microcode_mutex); -#endif } static bool load_builtin_intel_microcode(struct cpio_data *cp) From 09e182d17e8891dd73baba961a0f5a82e9274c97 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 21 Apr 2018 10:19:30 +0200 Subject: [PATCH 516/660] x86/microcode: Do not exit early from __reload_late() Vitezslav reported a case where the "Timeout during microcode update!" panic would hit. After a deeper look, it turned out that his .config had CONFIG_HOTPLUG_CPU disabled which practically made save_mc_for_early() a no-op. When that happened, the discovered microcode patch wasn't saved into the cache and the late loading path wouldn't find any. This, then, lead to early exit from __reload_late() and thus CPUs waiting until the timeout is reached, leading to the panic. In hindsight, that function should have been written so it does not return before the post-synchronization. Oh well, I know better now... Fixes: bb8c13d61a62 ("x86/microcode: Fix CPU synchronization routine") Reported-by: Vitezslav Samel Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Vitezslav Samel Tested-by: Ashok Raj Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180418081140.GA2439@pc11.op.pod.cz Link: https://lkml.kernel.org/r/20180421081930.15741-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 10c4fc2c91f8..77e201301528 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -564,14 +564,12 @@ static int __reload_late(void *info) apply_microcode_local(&err); spin_unlock(&update_lock); + /* siblings return UCODE_OK because their engine got updated already */ if (err > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - return -1; - /* siblings return UCODE_OK because their engine got updated already */ + ret = -1; } else if (err == UCODE_UPDATED || err == UCODE_OK) { ret = 1; - } else { - return ret; } /* From 907e21c15c883c2c15d1e5ee3cdbb7824ab1da59 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 17 Apr 2018 20:03:09 +0800 Subject: [PATCH 517/660] arm64: mm: drop addr parameter from sync icache and dcache The addr parameter isn't used for anything. Let's simplify and get rid of it, like arm. Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/mm/flush.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7e2c27e63cd8..7c4c8f318ba9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -230,7 +230,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } -extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); +extern void __sync_icache_dcache(pte_t pteval); /* * PTE bits configuration in the presence of hardware Dirty Bit Management @@ -253,7 +253,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t old_pte; if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte, addr); + __sync_icache_dcache(pte); /* * If the existing pte is valid, check for potential race with diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index e36ed5087b5c..1059884f9a6f 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -58,7 +58,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, flush_ptrace_access(vma, page, uaddr, dst, len); } -void __sync_icache_dcache(pte_t pte, unsigned long addr) +void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); From facb9f6eba3df4e8027301cc0e514dc582a1b366 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 23 Apr 2018 15:25:10 +0200 Subject: [PATCH 518/660] libceph: un-backoff on tick when we have a authenticated session This means that if we do some backoff, then authenticate, and are healthy for an extended period of time, a subsequent failure won't leave us starting our hunting sequence with a large backoff. Mirrors ceph.git commit d466bc6e66abba9b464b0b69687cf45c9dccf383. Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- net/ceph/mon_client.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index b3dac24412d3..02c441c12c38 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -209,6 +209,14 @@ static void reopen_session(struct ceph_mon_client *monc) __open_session(monc); } +static void un_backoff(struct ceph_mon_client *monc) +{ + monc->hunt_mult /= 2; /* reduce by 50% */ + if (monc->hunt_mult < 1) + monc->hunt_mult = 1; + dout("%s hunt_mult now %d\n", __func__, monc->hunt_mult); +} + /* * Reschedule delayed work timer. */ @@ -963,6 +971,7 @@ static void delayed_work(struct work_struct *work) if (!monc->hunting) { ceph_con_keepalive(&monc->con); __validate_auth(monc); + un_backoff(monc); } if (is_auth && @@ -1123,9 +1132,7 @@ static void finish_hunting(struct ceph_mon_client *monc) dout("%s found mon%d\n", __func__, monc->cur_mon); monc->hunting = false; monc->had_a_connection = true; - monc->hunt_mult /= 2; /* reduce by 50% */ - if (monc->hunt_mult < 1) - monc->hunt_mult = 1; + un_backoff(monc); } } From 7b4c443d139f1d2b5570da475f7a9cbcef86740c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 23 Apr 2018 15:25:10 +0200 Subject: [PATCH 519/660] libceph: reschedule a tick in finish_hunting() If we go without an established session for a while, backoff delay will climb to 30 seconds. The keepalive timeout is also 30 seconds, so it's pretty easily hit after a prolonged hunting for a monitor: we don't get a chance to send out a keepalive in time, which means we never get back a keepalive ack in time, cutting an established session and attempting to connect to a different monitor every 30 seconds: [Sun Apr 1 23:37:05 2018] libceph: mon0 10.80.20.99:6789 session established [Sun Apr 1 23:37:36 2018] libceph: mon0 10.80.20.99:6789 session lost, hunting for new mon [Sun Apr 1 23:37:36 2018] libceph: mon2 10.80.20.103:6789 session established [Sun Apr 1 23:38:07 2018] libceph: mon2 10.80.20.103:6789 session lost, hunting for new mon [Sun Apr 1 23:38:07 2018] libceph: mon1 10.80.20.100:6789 session established [Sun Apr 1 23:38:37 2018] libceph: mon1 10.80.20.100:6789 session lost, hunting for new mon [Sun Apr 1 23:38:37 2018] libceph: mon2 10.80.20.103:6789 session established [Sun Apr 1 23:39:08 2018] libceph: mon2 10.80.20.103:6789 session lost, hunting for new mon The regular keepalive interval is 10 seconds. After ->hunting is cleared in finish_hunting(), call __schedule_delayed() to ensure we send out a keepalive after 10 seconds. Cc: stable@vger.kernel.org # 4.7+ Link: http://tracker.ceph.com/issues/23537 Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- net/ceph/mon_client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 02c441c12c38..21ac6e3b96bb 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1133,6 +1133,7 @@ static void finish_hunting(struct ceph_mon_client *monc) monc->hunting = false; monc->had_a_connection = true; un_backoff(monc); + __schedule_delayed(monc); } } From a4efd3a4e685df239805ebd57f546904d5821114 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Wed, 28 Mar 2018 00:55:26 +0800 Subject: [PATCH 520/660] drm/amdkfd: kfd_dev_is_large_bar() can be static Fixes: 5ec7e02854b3 ("drm/amdkfd: Add ioctls for GPUVM memory management") Signed-off-by: Fengguang Wu Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index cd679cf1fd30..fb5d997e4148 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1147,7 +1147,7 @@ err_unlock: return ret; } -bool kfd_dev_is_large_bar(struct kfd_dev *dev) +static bool kfd_dev_is_large_bar(struct kfd_dev *dev) { struct kfd_local_mem_info mem_info; From a0a37862a4e1844793d39aca9ccb8fecbdcb8659 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 23 Apr 2018 14:16:03 +0300 Subject: [PATCH 521/660] ACPI / watchdog: Prefer iTCO_wdt on Lenovo Z50-70 WDAT table on Lenovo Z50-70 is using RTC SRAM (ports 0x70 and 0x71) to store state of the timer. This conflicts with Linux RTC driver (rtc-cmos.c) who fails to reserve those ports for itself preventing RTC from functioning. In addition the WDAT table seems not to be fully functional because it does not reset the system when the watchdog times out. On this system iTCO_wdt works just fine so we simply prefer to use it instead of WDAT. This makes RTC working again and also results working watchdog via iTCO_wdt. Reported-by: Peter Milley Link: https://bugzilla.kernel.org/show_bug.cgi?id=199033 Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_watchdog.c | 59 ++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index ebb626ffb5fa..4bde16fb97d8 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -12,23 +12,64 @@ #define pr_fmt(fmt) "ACPI: watchdog: " fmt #include +#include #include #include #include "internal.h" +static const struct dmi_system_id acpi_watchdog_skip[] = { + { + /* + * On Lenovo Z50-70 there are two issues with the WDAT + * table. First some of the instructions use RTC SRAM + * to store persistent information. This does not work well + * with Linux RTC driver. Second, more important thing is + * that the instructions do not actually reset the system. + * + * On this particular system iTCO_wdt seems to work just + * fine so we prefer that over WDAT for now. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033. + */ + .ident = "Lenovo Z50-70", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20354"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Z50-70"), + }, + }, + {} +}; + +static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) +{ + const struct acpi_table_wdat *wdat = NULL; + acpi_status status; + + if (acpi_disabled) + return NULL; + + if (dmi_check_system(acpi_watchdog_skip)) + return NULL; + + status = acpi_get_table(ACPI_SIG_WDAT, 0, + (struct acpi_table_header **)&wdat); + if (ACPI_FAILURE(status)) { + /* It is fine if there is no WDAT */ + return NULL; + } + + return wdat; +} + /** * Returns true if this system should prefer ACPI based watchdog instead of * the native one (which are typically the same hardware). */ bool acpi_has_watchdog(void) { - struct acpi_table_header hdr; - - if (acpi_disabled) - return false; - - return ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_WDAT, 0, &hdr)); + return !!acpi_watchdog_get_wdat(); } EXPORT_SYMBOL_GPL(acpi_has_watchdog); @@ -41,12 +82,10 @@ void __init acpi_watchdog_init(void) struct platform_device *pdev; struct resource *resources; size_t nresources = 0; - acpi_status status; int i; - status = acpi_get_table(ACPI_SIG_WDAT, 0, - (struct acpi_table_header **)&wdat); - if (ACPI_FAILURE(status)) { + wdat = acpi_watchdog_get_wdat(); + if (!wdat) { /* It is fine if there is no WDAT */ return; } From ded5e5622c38f6d0188c776bfd6793e1cdbb7606 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 30 Mar 2018 02:25:17 +0000 Subject: [PATCH 522/660] drm/amdkfd: Fix the error return code in kfd_ioctl_unmap_memory_from_gpu() Passing NULL pointer to PTR_ERR will result in return value of 0 indicating success which is clearly not what it is intended here. This patch returns -EINVAL instead. v2: change ret code to -ENODEV Fixes: 5ec7e02854b3 ("drm/amdkfd: Add ioctls for GPUVM memory management") Signed-off-by: Wei Yongjun Reviewed-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index fb5d997e4148..f65e0142d42e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1421,7 +1421,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, pdd = kfd_get_process_device_data(dev, p); if (!pdd) { - err = PTR_ERR(pdd); + err = -EINVAL; goto bind_process_to_device_failed; } From 1cf6cc74bbeb85bb87c3ca3f3df97a283c3aa737 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Tue, 10 Apr 2018 17:32:33 -0400 Subject: [PATCH 523/660] drm/amdkfd: fix clock counter retrieval for node without GPU Currently if a user requests clock counters for a node without a GPU resource we will always return EINVAL. Instead if no GPU resource is attached, fill the gpu_clock_counter argument with zeroes so that we may proceed and return valid CPU counters. Signed-off-by: Andres Rodriguez Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f65e0142d42e..59808a39ecf4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -749,12 +749,13 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, struct timespec64 time; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) - return -EINVAL; - - /* Reading GPU clock counter from KGD */ - args->gpu_clock_counter = - dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); + if (dev) + /* Reading GPU clock counter from KGD */ + args->gpu_clock_counter = + dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); + else + /* Node without GPU resource */ + args->gpu_clock_counter = 0; /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic64(&time); From ee53a65dc766384aaf1a26e3c43dd13456170b69 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Wed, 18 Apr 2018 08:56:42 -0700 Subject: [PATCH 524/660] cpufreq: brcmstb-avs-cpufreq: remove development debug support This debug code was helpful while developing the driver, but it isn't being used for anything anymore. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 10 - drivers/cpufreq/brcmstb-avs-cpufreq.c | 323 +------------------------- 2 files changed, 1 insertion(+), 332 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 7f56fe5183f2..de55c7d57438 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -71,16 +71,6 @@ config ARM_BRCMSTB_AVS_CPUFREQ Say Y, if you have a Broadcom SoC with AVS support for DFS or DVFS. -config ARM_BRCMSTB_AVS_CPUFREQ_DEBUG - bool "Broadcom STB AVS CPUfreq driver sysfs debug capability" - depends on ARM_BRCMSTB_AVS_CPUFREQ - help - Enabling this option turns on debug support via sysfs under - /sys/kernel/debug/brcmstb-avs-cpufreq. It is possible to read all and - write some AVS mailbox registers through sysfs entries. - - If in doubt, say N. - config ARM_EXYNOS5440_CPUFREQ tristate "SAMSUNG EXYNOS5440" depends on SOC_EXYNOS5440 diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 6cdac1aaf23c..b07559b9ed99 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -49,13 +49,6 @@ #include #include -#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG -#include -#include -#include -#include -#endif - /* Max number of arguments AVS calls take */ #define AVS_MAX_CMD_ARGS 4 /* @@ -182,88 +175,11 @@ struct private_data { void __iomem *base; void __iomem *avs_intr_base; struct device *dev; -#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG - struct dentry *debugfs; -#endif struct completion done; struct semaphore sem; struct pmap pmap; }; -#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG - -enum debugfs_format { - DEBUGFS_NORMAL, - DEBUGFS_FLOAT, - DEBUGFS_REV, -}; - -struct debugfs_data { - struct debugfs_entry *entry; - struct private_data *priv; -}; - -struct debugfs_entry { - char *name; - u32 offset; - fmode_t mode; - enum debugfs_format format; -}; - -#define DEBUGFS_ENTRY(name, mode, format) { \ - #name, AVS_MBOX_##name, mode, format \ -} - -/* - * These are used for debugfs only. Otherwise we use AVS_MBOX_PARAM() directly. - */ -#define AVS_MBOX_PARAM1 AVS_MBOX_PARAM(0) -#define AVS_MBOX_PARAM2 AVS_MBOX_PARAM(1) -#define AVS_MBOX_PARAM3 AVS_MBOX_PARAM(2) -#define AVS_MBOX_PARAM4 AVS_MBOX_PARAM(3) - -/* - * This table stores the name, access permissions and offset for each hardware - * register and is used to generate debugfs entries. - */ -static struct debugfs_entry debugfs_entries[] = { - DEBUGFS_ENTRY(COMMAND, S_IWUSR, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(STATUS, S_IWUSR, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(VOLTAGE0, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(TEMP0, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(PV0, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(MV0, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(PARAM1, S_IWUSR, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(PARAM2, S_IWUSR, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(PARAM3, S_IWUSR, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(PARAM4, S_IWUSR, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(REVISION, 0, DEBUGFS_REV), - DEBUGFS_ENTRY(PSTATE, 0, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(HEARTBEAT, 0, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(MAGIC, S_IWUSR, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(SIGMA_HVT, 0, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(SIGMA_SVT, 0, DEBUGFS_NORMAL), - DEBUGFS_ENTRY(VOLTAGE1, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(TEMP1, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(PV1, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(MV1, 0, DEBUGFS_FLOAT), - DEBUGFS_ENTRY(FREQUENCY, 0, DEBUGFS_NORMAL), -}; - -static int brcm_avs_target_index(struct cpufreq_policy *, unsigned int); - -static char *__strtolower(char *s) -{ - char *p; - - for (p = s; *p; p++) - *p = tolower(*p); - - return s; -} - -#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */ - static void __iomem *__map_region(const char *name) { struct device_node *np; @@ -516,238 +432,6 @@ brcm_avs_get_freq_table(struct device *dev, struct private_data *priv) return table; } -#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG - -#define MANT(x) (unsigned int)(abs((x)) / 1000) -#define FRAC(x) (unsigned int)(abs((x)) - abs((x)) / 1000 * 1000) - -static int brcm_avs_debug_show(struct seq_file *s, void *data) -{ - struct debugfs_data *dbgfs = s->private; - void __iomem *base; - u32 val, offset; - - if (!dbgfs) { - seq_puts(s, "No device pointer\n"); - return 0; - } - - base = dbgfs->priv->base; - offset = dbgfs->entry->offset; - val = readl(base + offset); - switch (dbgfs->entry->format) { - case DEBUGFS_NORMAL: - seq_printf(s, "%u\n", val); - break; - case DEBUGFS_FLOAT: - seq_printf(s, "%d.%03d\n", MANT(val), FRAC(val)); - break; - case DEBUGFS_REV: - seq_printf(s, "%c.%c.%c.%c\n", (val >> 24 & 0xff), - (val >> 16 & 0xff), (val >> 8 & 0xff), - val & 0xff); - break; - } - seq_printf(s, "0x%08x\n", val); - - return 0; -} - -#undef MANT -#undef FRAC - -static ssize_t brcm_avs_seq_write(struct file *file, const char __user *buf, - size_t size, loff_t *ppos) -{ - struct seq_file *s = file->private_data; - struct debugfs_data *dbgfs = s->private; - struct private_data *priv = dbgfs->priv; - void __iomem *base, *avs_intr_base; - bool use_issue_command = false; - unsigned long val, offset; - char str[128]; - int ret; - char *str_ptr = str; - - if (size >= sizeof(str)) - return -E2BIG; - - memset(str, 0, sizeof(str)); - ret = copy_from_user(str, buf, size); - if (ret) - return ret; - - base = priv->base; - avs_intr_base = priv->avs_intr_base; - offset = dbgfs->entry->offset; - /* - * Special case writing to "command" entry only: if the string starts - * with a 'c', we use the driver's __issue_avs_command() function. - * Otherwise, we perform a raw write. This should allow testing of raw - * access as well as using the higher level function. (Raw access - * doesn't clear the firmware return status after issuing the command.) - */ - if (str_ptr[0] == 'c' && offset == AVS_MBOX_COMMAND) { - use_issue_command = true; - str_ptr++; - } - if (kstrtoul(str_ptr, 0, &val) != 0) - return -EINVAL; - - /* - * Setting the P-state is a special case. We need to update the CPU - * frequency we report. - */ - if (val == AVS_CMD_SET_PSTATE) { - struct cpufreq_policy *policy; - unsigned int pstate; - - policy = cpufreq_cpu_get(smp_processor_id()); - /* Read back the P-state we are about to set */ - pstate = readl(base + AVS_MBOX_PARAM(0)); - if (use_issue_command) { - ret = brcm_avs_target_index(policy, pstate); - return ret ? ret : size; - } - policy->cur = policy->freq_table[pstate].frequency; - } - - if (use_issue_command) { - ret = __issue_avs_command(priv, val, false, NULL); - } else { - /* Locking here is not perfect, but is only for debug. */ - ret = down_interruptible(&priv->sem); - if (ret) - return ret; - - writel(val, base + offset); - /* We have to wake up the firmware to process a command. */ - if (offset == AVS_MBOX_COMMAND) - writel(AVS_CPU_L2_INT_MASK, - avs_intr_base + AVS_CPU_L2_SET0); - up(&priv->sem); - } - - return ret ? ret : size; -} - -static struct debugfs_entry *__find_debugfs_entry(const char *name) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++) - if (strcasecmp(debugfs_entries[i].name, name) == 0) - return &debugfs_entries[i]; - - return NULL; -} - -static int brcm_avs_debug_open(struct inode *inode, struct file *file) -{ - struct debugfs_data *data; - fmode_t fmode; - int ret; - - /* - * seq_open(), which is called by single_open(), clears "write" access. - * We need write access to some files, so we preserve our access mode - * and restore it. - */ - fmode = file->f_mode; - /* - * Check access permissions even for root. We don't want to be writing - * to read-only registers. Access for regular users has already been - * checked by the VFS layer. - */ - if ((fmode & FMODE_WRITER) && !(inode->i_mode & S_IWUSR)) - return -EACCES; - - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - /* - * We use the same file system operations for all our debug files. To - * produce specific output, we look up the file name upon opening a - * debugfs entry and map it to a memory offset. This offset is then used - * in the generic "show" function to read a specific register. - */ - data->entry = __find_debugfs_entry(file->f_path.dentry->d_iname); - data->priv = inode->i_private; - - ret = single_open(file, brcm_avs_debug_show, data); - if (ret) - kfree(data); - file->f_mode = fmode; - - return ret; -} - -static int brcm_avs_debug_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq_priv = file->private_data; - struct debugfs_data *data = seq_priv->private; - - kfree(data); - return single_release(inode, file); -} - -static const struct file_operations brcm_avs_debug_ops = { - .open = brcm_avs_debug_open, - .read = seq_read, - .write = brcm_avs_seq_write, - .llseek = seq_lseek, - .release = brcm_avs_debug_release, -}; - -static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev) -{ - struct private_data *priv = platform_get_drvdata(pdev); - struct dentry *dir; - int i; - - if (!priv) - return; - - dir = debugfs_create_dir(BRCM_AVS_CPUFREQ_NAME, NULL); - if (IS_ERR_OR_NULL(dir)) - return; - priv->debugfs = dir; - - for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++) { - /* - * The DEBUGFS_ENTRY macro generates uppercase strings. We - * convert them to lowercase before creating the debugfs - * entries. - */ - char *entry = __strtolower(debugfs_entries[i].name); - fmode_t mode = debugfs_entries[i].mode; - - if (!debugfs_create_file(entry, S_IFREG | S_IRUGO | mode, - dir, priv, &brcm_avs_debug_ops)) { - priv->debugfs = NULL; - debugfs_remove_recursive(dir); - break; - } - } -} - -static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev) -{ - struct private_data *priv = platform_get_drvdata(pdev); - - if (priv && priv->debugfs) { - debugfs_remove_recursive(priv->debugfs); - priv->debugfs = NULL; - } -} - -#else - -static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev) {} -static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev) {} - -#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */ - /* * To ensure the right firmware is running we need to * - check the MAGIC matches what we expect @@ -1016,11 +700,8 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev) return ret; brcm_avs_driver.driver_data = pdev; - ret = cpufreq_register_driver(&brcm_avs_driver); - if (!ret) - brcm_avs_cpufreq_debug_init(pdev); - return ret; + return cpufreq_register_driver(&brcm_avs_driver); } static int brcm_avs_cpufreq_remove(struct platform_device *pdev) @@ -1032,8 +713,6 @@ static int brcm_avs_cpufreq_remove(struct platform_device *pdev) if (ret) return ret; - brcm_avs_cpufreq_debug_exit(pdev); - priv = platform_get_drvdata(pdev); iounmap(priv->base); iounmap(priv->avs_intr_base); From 7bbc0b950f34bf11c26a32b6beb927777e7f3b93 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Apr 2018 19:49:28 -0700 Subject: [PATCH 525/660] drm/amdkfd: fix build, select MMU_NOTIFIER When CONFIG_MMU_NOTIFIER is not enabled, struct mmu_notifier has an incomplete type definition, which causes build errors. ../drivers/gpu/drm/amd/amdkfd/kfd_priv.h:607:22: error: field 'mmu_notifier' has incomplete type ../include/linux/kernel.h:979:32: error: dereferencing pointer to incomplete type ../include/linux/kernel.h:980:18: error: dereferencing pointer to incomplete type ../drivers/gpu/drm/amd/amdkfd/kfd_process.c:434:2: error: implicit declaration of function 'mmu_notifier_unregister_no_release' [-Werror=implicit-function-declaration] ../drivers/gpu/drm/amd/amdkfd/kfd_process.c:435:2: error: implicit declaration of function 'mmu_notifier_call_srcu' [-Werror=implicit-function-declaration] ../drivers/gpu/drm/amd/amdkfd/kfd_process.c:438:21: error: variable 'kfd_process_mmu_notifier_ops' has initializer but incomplete type ../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: error: unknown field 'release' specified in initializer ../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: warning: excess elements in struct initializer [enabled by default] ../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: warning: (near initialization for 'kfd_process_mmu_notifier_ops') [enabled by default] ../drivers/gpu/drm/amd/amdkfd/kfd_process.c:534:2: error: implicit declaration of function 'mmu_notifier_register' [-Werror=implicit-function-declaration] Signed-off-by: Randy Dunlap Tested-by: Anders Roxell Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index ed2f06c9f346..3858820a0055 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -6,5 +6,6 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" depends on DRM_AMDGPU && X86_64 imply AMD_IOMMU_V2 + select MMU_NOTIFIER help Enable this if you want to use HSA features on AMD GPU devices. From ac1e55b1fdb27c1b07a0a6fe519f1291ff1e7d40 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 23 Apr 2018 11:16:56 +0200 Subject: [PATCH 526/660] ACPI / button: make module loadable when booted in non-ACPI mode Modules such as nouveau.ko and i915.ko have a link time dependency on acpi_lid_open(), and due to its use of acpi_bus_register_driver(), the button.ko module that provides it is only loadable when booted in ACPI mode. However, the ACPI button driver can be built into the core kernel as well, in which case the dependency can always be satisfied, and the dependent modules can be loaded regardless of whether the system was booted in ACPI mode or not. So let's fix this asymmetry by making the ACPI button driver loadable as a module even if not booted in ACPI mode, so it can provide the acpi_lid_open() symbol in the same way as when built into the kernel. Signed-off-by: Ard Biesheuvel [ rjw: Minor adjustments of comments, whitespace and names. ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index e1eee7a60fad..f1cc4f9d31cd 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -635,4 +635,26 @@ module_param_call(lid_init_state, NULL, 0644); MODULE_PARM_DESC(lid_init_state, "Behavior for reporting LID initial state"); -module_acpi_driver(acpi_button_driver); +static int acpi_button_register_driver(struct acpi_driver *driver) +{ + /* + * Modules such as nouveau.ko and i915.ko have a link time dependency + * on acpi_lid_open(), and would therefore not be loadable on ACPI + * capable kernels booted in non-ACPI mode if the return value of + * acpi_bus_register_driver() is returned from here with ACPI disabled + * when this driver is built as a module. + */ + if (acpi_disabled) + return 0; + + return acpi_bus_register_driver(driver); +} + +static void acpi_button_unregister_driver(struct acpi_driver *driver) +{ + if (!acpi_disabled) + acpi_bus_unregister_driver(driver); +} + +module_driver(acpi_button_driver, acpi_button_register_driver, + acpi_button_unregister_driver); From 2b54f785b4d4894ab7ab3bf5e461e0819d221c1c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 23 Apr 2018 15:19:25 +0200 Subject: [PATCH 527/660] ALSA: usb-audio: Fix missing endian conversion The UAC2 jack detection support introduced the bmControls checks in a couple of places, but they forgot the endian conversion; the bmControls of UAC2 terminal descriptor is __le16, not a byte like in UAC1. Fixes: 5a222e849452 ("ALSA: usb-audio: UAC2 jack detection") Tested-by: Andrew Chant Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 3387483310b1..344d7b069d59 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1860,7 +1860,7 @@ static int parse_audio_input_terminal(struct mixer_build *state, int unitid, check_input_term(state, d->bTerminalID, &iterm); if (state->mixer->protocol == UAC_VERSION_2) { /* Check for jack detection. */ - if (uac_v2v3_control_is_readable(d->bmControls, + if (uac_v2v3_control_is_readable(le16_to_cpu(d->bmControls), UAC2_TE_CONNECTOR)) { build_connector_control(state, &iterm, true); } @@ -2562,7 +2562,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer) if (err < 0 && err != -EINVAL) return err; - if (uac_v2v3_control_is_readable(desc->bmControls, + if (uac_v2v3_control_is_readable(le16_to_cpu(desc->bmControls), UAC2_TE_CONNECTOR)) { build_connector_control(&state, &state.oterm, false); From 1d8d6428d1da642ddd75b0be2d1bb1123ff8e017 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 11:11:48 +0200 Subject: [PATCH 528/660] ALSA: usb-audio: Skip broken EU on Dell dock USB-audio The Dell Dock USB-audio device with 0bda:4014 is behaving notoriously bad, and we have already applied some workaround to avoid the firmware hiccup. Yet we still need to skip one thing, the Extension Unit at ID 4, which doesn't react correctly to the mixer ctl access. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1090658 Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer_maps.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 9038b2e7df73..eaa03acd4686 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -353,8 +353,11 @@ static struct usbmix_name_map bose_companion5_map[] = { /* * Dell usb dock with ALC4020 codec had a firmware problem where it got * screwed up when zero volume is passed; just skip it as a workaround + * + * Also the extension unit gives an access error, so skip it as well. */ static const struct usbmix_name_map dell_alc4020_map[] = { + { 4, NULL }, /* extension unit */ { 16, NULL }, { 19, NULL }, { 0 } From 10412c420af9ba1f3de8483a95d360e5eb5bfc84 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 22 Apr 2018 21:19:24 +0900 Subject: [PATCH 529/660] ALSA: dice: fix OUI for TC group OUI for TC Electronic is 0x000166, for TC GROUP A/S. 0x001486 is for Echo Digital Audio Corporation. Fixes: 7cafc65b3aa1 ('ALSA: dice: force to add two pcm devices for listed models') Cc: # v4.6+ Reference: http://standards-oui.ieee.org/oui/oui.txt Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c index 4ddb4cdd054b..96bb01b6b751 100644 --- a/sound/firewire/dice/dice.c +++ b/sound/firewire/dice/dice.c @@ -14,7 +14,7 @@ MODULE_LICENSE("GPL v2"); #define OUI_WEISS 0x001c6a #define OUI_LOUD 0x000ff2 #define OUI_FOCUSRITE 0x00130e -#define OUI_TCELECTRONIC 0x001486 +#define OUI_TCELECTRONIC 0x000166 #define DICE_CATEGORY_ID 0x04 #define WEISS_CATEGORY_ID 0x00 From 9cf2f437ca5b39828984064fad213e68fc17ef11 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Apr 2018 14:33:37 +0800 Subject: [PATCH 530/660] team: fix netconsole setup over team MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same fix in Commit dbe173079ab5 ("bridge: fix netconsole setup over bridge") is also needed for team driver. While at it, remove the unnecessary parameter *team from team_port_enable_netpoll(). v1->v2: - fix it in a better way, as does bridge. Fixes: 0fb52a27a04a ("team: cleanup netpoll clode") Reported-by: João Avelino Bellomo Filho Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/team/team.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index acbe84967834..ddb6bf85a59c 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1072,14 +1072,11 @@ static void team_port_leave(struct team *team, struct team_port *port) } #ifdef CONFIG_NET_POLL_CONTROLLER -static int team_port_enable_netpoll(struct team *team, struct team_port *port) +static int __team_port_enable_netpoll(struct team_port *port) { struct netpoll *np; int err; - if (!team->dev->npinfo) - return 0; - np = kzalloc(sizeof(*np), GFP_KERNEL); if (!np) return -ENOMEM; @@ -1093,6 +1090,14 @@ static int team_port_enable_netpoll(struct team *team, struct team_port *port) return err; } +static int team_port_enable_netpoll(struct team_port *port) +{ + if (!port->team->dev->npinfo) + return 0; + + return __team_port_enable_netpoll(port); +} + static void team_port_disable_netpoll(struct team_port *port) { struct netpoll *np = port->np; @@ -1107,7 +1112,7 @@ static void team_port_disable_netpoll(struct team_port *port) kfree(np); } #else -static int team_port_enable_netpoll(struct team *team, struct team_port *port) +static int team_port_enable_netpoll(struct team_port *port) { return 0; } @@ -1221,7 +1226,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_vids_add; } - err = team_port_enable_netpoll(team, port); + err = team_port_enable_netpoll(port); if (err) { netdev_err(dev, "Failed to enable netpoll on device %s\n", portname); @@ -1918,7 +1923,7 @@ static int team_netpoll_setup(struct net_device *dev, mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { - err = team_port_enable_netpoll(team, port); + err = __team_port_enable_netpoll(port); if (err) { __team_netpoll_cleanup(team); break; From 117e3b7fed552eba96ae0b3b92312fe8c5b0bfdd Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 22 Apr 2018 10:24:19 -0500 Subject: [PATCH 531/660] CIFS: set *resp_buf_type to NO_BUFFER on error Dan Carpenter had pointed this out a while ago, but the code around this had changed so wasn't causing any problems since that field was not used in this error path. Still, it is cleaner to always initialize this field, so changing the error path to set it. Reviewed-by: Ronnie Sahlberg CC: Dan Carpenter Signed-off-by: Steve French --- fs/cifs/transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 8f6f25918229..3fb0e433b8e2 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -834,8 +834,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), GFP_KERNEL); - if (!new_iov) + if (!new_iov) { + /* otherwise cifs_send_recv below sets resp_buf_type */ + *resp_buf_type = CIFS_NO_BUFFER; return -ENOMEM; + } } else new_iov = s_iov; From 23657ad7305ee8b263d27335abdd00917764c9cf Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 22 Apr 2018 15:14:58 -0500 Subject: [PATCH 532/660] SMB3: Fix 3.11 encryption to Windows and handle encrypted smb3 tcon Temporarily disable AES-GCM, as AES-CCM is only currently enabled mechanism on client side. This fixes SMB3.11 encrypted mounts to Windows. Also the tree connect request itself should be encrypted if requested encryption ("seal" on mount), in addition we should be enabling encryption in 3.11 based on whether we got any valid encryption ciphers back in negprot (the corresponding session flag is not set as it is in 3.0 and 3.02) Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg CC: Stable --- fs/cifs/connect.c | 32 ++++++++++++++++---------------- fs/cifs/smb2pdu.c | 9 +++++---- fs/cifs/smb2pdu.h | 2 +- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e8830f076a7f..a5aa158d535a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2959,6 +2959,22 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) } } + if (volume_info->seal) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "SMB3 or later required for encryption\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } else if (tcon->ses->server->capabilities & + SMB2_GLOBAL_CAP_ENCRYPTION) + tcon->seal = true; + else { + cifs_dbg(VFS, "Encryption is not supported on share\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } + } + /* * BB Do we need to wrap session_mutex around this TCon call and Unix * SetFS as we do on SessSetup and reconnect? @@ -3007,22 +3023,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) tcon->use_resilient = true; } - if (volume_info->seal) { - if (ses->server->vals->protocol_id == 0) { - cifs_dbg(VFS, - "SMB3 or later required for encryption\n"); - rc = -EOPNOTSUPP; - goto out_fail; - } else if (tcon->ses->server->capabilities & - SMB2_GLOBAL_CAP_ENCRYPTION) - tcon->seal = true; - else { - cifs_dbg(VFS, "Encryption is not supported on share\n"); - rc = -EOPNOTSUPP; - goto out_fail; - } - } - /* * We can have only one retry value for a connection to a share so for * resources mounted more than once to the same server share the last diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0f044c4a2dc9..9aea138dd71f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -383,10 +383,10 @@ static void build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt) { pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES; - pneg_ctxt->DataLength = cpu_to_le16(6); - pneg_ctxt->CipherCount = cpu_to_le16(2); - pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM; - pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM; + pneg_ctxt->DataLength = cpu_to_le16(4); /* Cipher Count + le16 cipher */ + pneg_ctxt->CipherCount = cpu_to_le16(1); +/* pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;*/ /* not supported yet */ + pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_CCM; } static void @@ -444,6 +444,7 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server, return -EINVAL; } server->cipher_type = ctxt->Ciphers[0]; + server->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; return 0; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6093e5142b2b..d28f358022c5 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -297,7 +297,7 @@ struct smb2_encryption_neg_context { __le16 DataLength; __le32 Reserved; __le16 CipherCount; /* AES-128-GCM and AES-128-CCM */ - __le16 Ciphers[2]; /* Ciphers[0] since only one used now */ + __le16 Ciphers[1]; /* Ciphers[0] since only one used now */ } __packed; struct smb2_negotiate_rsp { From 39035bfdc3f18987aba04165060bfbfa10ffc1cd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Mar 2018 15:21:48 +0100 Subject: [PATCH 533/660] ixgbevf: ensure xdp_ring resources are free'd on error exit The current error handling for failed resource setup for xdp_ring data is a break out of the loop and returning 0 indicated everything was OK, when in fact it is not. Fix this by exiting via the error exit label err_setup_tx that will clean up the resources correctly and return and error status. Detected by CoverityScan, CID#1466879 ("Logically dead code") Fixes: 21092e9ce8b1 ("ixgbevf: Add support for XDP_TX action") Signed-off-by: Colin Ian King Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3d9033f26eff..e3d04f226d57 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3420,7 +3420,7 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) if (!err) continue; hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j); - break; + goto err_setup_tx; } return 0; From 22be37acce25d66ecf6403fc8f44df9c5ded2372 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 24 Apr 2018 11:31:44 -0400 Subject: [PATCH 534/660] ext4: fix bitmap position validation Currently in ext4_valid_block_bitmap() we expect the bitmap to be positioned anywhere between 0 and s_blocksize clusters, but that's wrong because the bitmap can be placed anywhere in the block group. This causes false positives when validating bitmaps on perfectly valid file system layouts. Fix it by checking whether the bitmap is within the group boundary. The problem can be reproduced using the following mkfs -t ext3 -E stride=256 /dev/vdb1 mount /dev/vdb1 /mnt/test cd /mnt/test wget https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.16.3.tar.xz tar xf linux-4.16.3.tar.xz This will result in the warnings in the logs EXT4-fs error (device vdb1): ext4_validate_block_bitmap:399: comm tar: bg 84: block 2774529: invalid block bitmap [ Changed slightly for clarity and to not drop a overflow test -- TYT ] Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Reported-by: Ilya Dryomov Fixes: 7dac4a1726a9 ("ext4: add validity checks for bitmap block numbers") Cc: stable@vger.kernel.org --- fs/ext4/balloc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index a33d8fb1bf2a..508b905d744d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -321,6 +321,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; + ext4_grpblk_t max_bit = EXT4_CLUSTERS_PER_GROUP(sb); ext4_fsblk_t blk; ext4_fsblk_t group_first_block; @@ -338,7 +339,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, /* check whether block bitmap block number is set */ blk = ext4_block_bitmap(sb, desc); offset = blk - group_first_block; - if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize || + if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; @@ -346,7 +347,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, /* check whether the inode bitmap block number is set */ blk = ext4_inode_bitmap(sb, desc); offset = blk - group_first_block; - if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize || + if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; @@ -354,8 +355,8 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, /* check whether the inode table block number is set */ blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; - if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize || - EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= sb->s_blocksize) + if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || + EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit) return blk; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, EXT4_B2C(sbi, offset + sbi->s_itb_per_group), From 6510bbc88e3258631831ade49033537081950605 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 1 Mar 2018 14:39:39 +0100 Subject: [PATCH 535/660] mtd: cfi: cmdset_0001: Do not allow read/write to suspend erase block. Currently it is possible to read and/or write to suspend EB's. Writing /dev/mtdX or /dev/mtdblockX from several processes may break the flash state machine. Signed-off-by: Joakim Tjernlund Cc: Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- drivers/mtd/chips/cfi_cmdset_0001.c | 16 +++++++++++----- include/linux/mtd/flashchip.h | 1 + 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index d4c07b85f18e..8833115fbc11 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -831,21 +831,25 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long (mode == FL_WRITING && (cfip->SuspendCmdSupport & 1)))) goto sleep; + /* Do not allow suspend iff read/write to EB address */ + if ((adr & chip->in_progress_block_mask) == + chip->in_progress_block_addr) + goto sleep; /* Erase suspend */ - map_write(map, CMD(0xB0), adr); + map_write(map, CMD(0xB0), chip->in_progress_block_addr); /* If the flash has finished erasing, then 'erase suspend' * appears to make some (28F320) flash devices switch to * 'read' mode. Make sure that we switch to 'read status' * mode so we get the right data. --rmk */ - map_write(map, CMD(0x70), adr); + map_write(map, CMD(0x70), chip->in_progress_block_addr); chip->oldstate = FL_ERASING; chip->state = FL_ERASE_SUSPENDING; chip->erase_suspended = 1; for (;;) { - status = map_read(map, adr); + status = map_read(map, chip->in_progress_block_addr); if (map_word_andequal(map, status, status_OK, status_OK)) break; @@ -1041,8 +1045,8 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad sending the 0x70 (Read Status) command to an erasing chip and expecting it to be ignored, that's what we do. */ - map_write(map, CMD(0xd0), adr); - map_write(map, CMD(0x70), adr); + map_write(map, CMD(0xd0), chip->in_progress_block_addr); + map_write(map, CMD(0x70), chip->in_progress_block_addr); chip->oldstate = FL_READY; chip->state = FL_ERASING; break; @@ -1933,6 +1937,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, map_write(map, CMD(0xD0), adr); chip->state = FL_ERASING; chip->erase_suspended = 0; + chip->in_progress_block_addr = adr; + chip->in_progress_block_mask = ~(len - 1); ret = INVAL_CACHE_AND_WAIT(map, chip, adr, adr, len, diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index b63fa457febd..3529683f691e 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -85,6 +85,7 @@ struct flchip { unsigned int write_suspended:1; unsigned int erase_suspended:1; unsigned long in_progress_block_addr; + unsigned long in_progress_block_mask; struct mutex mutex; wait_queue_head_t wq; /* Wait on here when we're waiting for the chip From 46a16a2283f9e678a4e26829175e0c37a5191860 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 1 Mar 2018 14:39:40 +0100 Subject: [PATCH 536/660] mtd: cfi: cmdset_0001: Workaround Micron Erase suspend bug. Some Micron chips does not work well wrt Erase suspend for boot blocks. This avoids the issue by not allowing Erase suspend for the boot blocks for the 28F00AP30(1GBit) chip. Signed-off-by: Joakim Tjernlund Cc: Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- drivers/mtd/chips/cfi_cmdset_0001.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index 8833115fbc11..f5695be14499 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -45,6 +45,7 @@ #define I82802AB 0x00ad #define I82802AC 0x00ac #define PF38F4476 0x881c +#define M28F00AP30 0x8963 /* STMicroelectronics chips */ #define M50LPW080 0x002F #define M50FLW080A 0x0080 @@ -375,6 +376,17 @@ static void cfi_fixup_major_minor(struct cfi_private *cfi, extp->MinorVersion = '1'; } +static int cfi_is_micron_28F00AP30(struct cfi_private *cfi, struct flchip *chip) +{ + /* + * Micron(was Numonyx) 1Gbit bottom boot are buggy w.r.t + * Erase Supend for their small Erase Blocks(0x8000) + */ + if (cfi->mfr == CFI_MFR_INTEL && cfi->id == M28F00AP30) + return 1; + return 0; +} + static inline struct cfi_pri_intelext * read_pri_intelext(struct map_info *map, __u16 adr) { @@ -836,6 +848,11 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long chip->in_progress_block_addr) goto sleep; + /* do not suspend small EBs, buggy Micron Chips */ + if (cfi_is_micron_28F00AP30(cfi, chip) && + (chip->in_progress_block_mask == ~(0x8000-1))) + goto sleep; + /* Erase suspend */ map_write(map, CMD(0xB0), chip->in_progress_block_addr); From 7b70eb14392a7cf505f9b358d06c33b5af73d1e7 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 1 Mar 2018 14:39:41 +0100 Subject: [PATCH 537/660] mtd: cfi: cmdset_0002: Do not allow read/write to suspend erase block. Currently it is possible to read and/or write to suspend EB's. Writing /dev/mtdX or /dev/mtdblockX from several processes may break the flash state machine. Taken from cfi_cmdset_0001 driver. Signed-off-by: Joakim Tjernlund Cc: Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- drivers/mtd/chips/cfi_cmdset_0002.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 668e2cbc155b..692902df2598 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -816,9 +816,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr (mode == FL_WRITING && (cfip->EraseSuspend & 0x2)))) goto sleep; - /* We could check to see if we're trying to access the sector - * that is currently being erased. However, no user will try - * anything like that so we just wait for the timeout. */ + /* Do not allow suspend iff read/write to EB address */ + if ((adr & chip->in_progress_block_mask) == + chip->in_progress_block_addr) + goto sleep; /* Erase suspend */ /* It's harmless to issue the Erase-Suspend and Erase-Resume @@ -2267,6 +2268,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) chip->state = FL_ERASING; chip->erase_suspended = 0; chip->in_progress_block_addr = adr; + chip->in_progress_block_mask = ~(map->size - 1); INVALIDATE_CACHE_UDELAY(map, chip, adr, map->size, @@ -2356,6 +2358,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, chip->state = FL_ERASING; chip->erase_suspended = 0; chip->in_progress_block_addr = adr; + chip->in_progress_block_mask = ~(len - 1); INVALIDATE_CACHE_UDELAY(map, chip, adr, len, From 2707df9773cd2cb8b0f35b8592431b301da9d352 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 30 Mar 2018 17:06:52 -0700 Subject: [PATCH 538/660] igb: Fix the transmission mode of queue 0 for Qav mode When Qav mode is enabled, queue 0 should be kept on Stream Reservation mode. From the i210 datasheet, section 8.12.19: "Note: Queue0 QueueMode must be set to 1b when TransmitMode is set to Qav." ("QueueMode 1b" represents the Stream Reservation mode) The solution is to give queue 0 the all the credits it might need, so it has priority over queue 1. A situation where this can happen is when cbs is "installed" only on queue 1, leaving queue 0 alone. For example: $ tc qdisc replace dev enp2s0 handle 100: parent root mqprio num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 0 $ tc qdisc replace dev enp2s0 parent 100:2 cbs locredit -1470 \ hicredit 30 sendslope -980000 idleslope 20000 offload 1 Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c1c0bc30a16d..cce7ada89255 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1700,7 +1700,22 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue, WARN_ON(hw->mac.type != e1000_i210); WARN_ON(queue < 0 || queue > 1); - if (enable) { + if (enable || queue == 0) { + /* i210 does not allow the queue 0 to be in the Strict + * Priority mode while the Qav mode is enabled, so, + * instead of disabling strict priority mode, we give + * queue 0 the maximum of credits possible. + * + * See section 8.12.19 of the i210 datasheet, "Note: + * Queue0 QueueMode must be set to 1b when + * TransmitMode is set to Qav." + */ + if (queue == 0 && !enable) { + /* max "linkspeed" idleslope in kbps */ + idleslope = 1000000; + hicredit = ETH_FRAME_LEN; + } + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH); set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION); From d332a38c9519e0208f04da465bc88427db3485f6 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 10 Apr 2018 10:49:49 -0700 Subject: [PATCH 539/660] ice: Fix initialization for num_nodes_added ice_sched_add_nodes_to_layer is used recursively, and so we start with num_nodes_added being 0. This way, in case of an error or if num_nodes is NULL, the function just returns 0 to indicate that no nodes were added. Fixes: 5513b920a4f7 ("ice: Update Tx scheduler tree for VSI multi-Tx queue support") Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index f16ff3e4a840..2e6c1d92cc88 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -751,14 +751,14 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi, u16 num_added = 0; u32 temp; + *num_nodes_added = 0; + if (!num_nodes) return status; if (!parent || layer < hw->sw_entry_point_layer) return ICE_ERR_PARAM; - *num_nodes_added = 0; - /* max children per node per layer */ max_child_nodes = le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children); From 34357a90d5ca8228df4f88b21197f970285b209b Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 11 Apr 2018 10:41:47 -0700 Subject: [PATCH 540/660] ice: Fix incorrect comment for action type Action type 5 defines large action generic values. Fix comment to reflect that better. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 5b13ca1bd85f..7dc5f045e969 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -586,7 +586,7 @@ struct ice_sw_rule_lg_act { #define ICE_LG_ACT_MIRROR_VSI_ID_S 3 #define ICE_LG_ACT_MIRROR_VSI_ID_M (0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S) - /* Action type = 5 - Large Action */ + /* Action type = 5 - Generic Value */ #define ICE_LG_ACT_GENERIC 0x5 #define ICE_LG_ACT_GENERIC_VALUE_S 3 #define ICE_LG_ACT_GENERIC_VALUE_M (0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S) From 6c1e851c4edc13a43adb3ea4044e3fc8f43ccf7d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Apr 2018 18:51:28 -0400 Subject: [PATCH 541/660] random: fix possible sleeping allocation from irq context We can do a sleeping allocation from an irq context when CONFIG_NUMA is enabled. Fix this by initializing the NUMA crng instances in a workqueue. Reported-by: Tetsuo Handa Reported-by: syzbot+9de458f6a5e713ee8c1a@syzkaller.appspotmail.com Fixes: 8ef35c866f8862df ("random: set up the NUMA crng instances...") Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 3cd3aae24d6d..721dca8db9cf 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -789,7 +789,7 @@ static void crng_initialize(struct crng_state *crng) } #ifdef CONFIG_NUMA -static void numa_crng_init(void) +static void do_numa_crng_init(struct work_struct *work) { int i; struct crng_state *crng; @@ -810,6 +810,13 @@ static void numa_crng_init(void) kfree(pool); } } + +static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); + +static void numa_crng_init(void) +{ + schedule_work(&numa_crng_init_work); +} #else static void numa_crng_init(void) {} #endif From 30d84397affb0fcb11beaf049caabfcb1dac65a6 Mon Sep 17 00:00:00 2001 From: Ben Shelton Date: Wed, 11 Apr 2018 12:21:33 -0700 Subject: [PATCH 542/660] ice: Do not check INTEVENT bit for OICR interrupts According to the hardware spec, checking the INTEVENT bit isn't a reliable way to detect if an OICR interrupt has occurred. This is because this bit can be cleared by the hardware/firmware before the interrupt service routine has run. So instead, just check for OICR events every time. Fixes: 940b61af02f4 ("ice: Initialize PF and setup miscellaneous interrupt") Signed-off-by: Ben Shelton Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 2 -- drivers/net/ethernet/intel/ice/ice_main.c | 4 ---- 2 files changed, 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 1b9e2ef48a9d..499904874b3f 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -121,8 +121,6 @@ #define PFINT_FW_CTL_CAUSE_ENA_S 30 #define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S) #define PFINT_OICR 0x0016CA00 -#define PFINT_OICR_INTEVENT_S 0 -#define PFINT_OICR_INTEVENT_M BIT(PFINT_OICR_INTEVENT_S) #define PFINT_OICR_HLP_RDY_S 14 #define PFINT_OICR_HLP_RDY_M BIT(PFINT_OICR_HLP_RDY_S) #define PFINT_OICR_CPM_RDY_S 15 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 210b7910f1cd..5299caf55a7f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1722,9 +1722,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); - if (!(oicr & PFINT_OICR_INTEVENT_M)) - goto ena_intr; - if (oicr & PFINT_OICR_GRST_M) { u32 reset; /* we have a reset warning */ @@ -1782,7 +1779,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) } ret = IRQ_HANDLED; -ena_intr: /* re-enable interrupt causes that are not handled during this pass */ wr32(hw, PFINT_OICR_ENA, ena_mask); if (!test_bit(__ICE_DOWN, pf->state)) { From a6361f0ca4b25460f2cdf3235ebe8115f622901e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 23 Apr 2018 17:37:03 -0400 Subject: [PATCH 543/660] packet: fix bitfield update race Updates to the bitfields in struct packet_sock are not atomic. Serialize these read-modify-write cycles. Move po->running into a separate variable. Its writes are protected by po->bind_lock (except for one startup case at packet_create). Also replace a textual precondition warning with lockdep annotation. All others are set only in packet_setsockopt. Serialize these updates by holding the socket lock. Analogous to other field updates, also hold the lock when testing whether a ring is active (pg_vec). Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg") Reported-by: DaeRyong Jeong Reported-by: Byoungyoung Lee Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 60 +++++++++++++++++++++++++++++++----------- net/packet/internal.h | 10 +++---- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c31b0687396a..01f3515cada0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -329,11 +329,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) skb_set_queue_mapping(skb, queue_index); } -/* register_prot_hook must be invoked with the po->bind_lock held, +/* __register_prot_hook must be invoked through register_prot_hook * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */ -static void register_prot_hook(struct sock *sk) +static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); @@ -348,8 +348,13 @@ static void register_prot_hook(struct sock *sk) } } -/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock - * held. If the sync parameter is true, we will temporarily drop +static void register_prot_hook(struct sock *sk) +{ + lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); + __register_prot_hook(sk); +} + +/* If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the @@ -359,6 +364,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); + lockdep_assert_held_once(&po->bind_lock); + po->running = 0; if (po->fanout) @@ -3252,7 +3259,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (proto) { po->prot_hook.type = proto; - register_prot_hook(sk); + __register_prot_hook(sk); } mutex_lock(&net->packet.sklist_lock); @@ -3732,12 +3739,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_loss = !!val; - return 0; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_loss = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_AUXDATA: { @@ -3748,7 +3761,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->auxdata = !!val; + release_sock(sk); return 0; } case PACKET_ORIGDEV: @@ -3760,7 +3775,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->origdev = !!val; + release_sock(sk); return 0; } case PACKET_VNET_HDR: @@ -3769,15 +3786,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (sock->type != SOCK_RAW) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->has_vnet_hdr = !!val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->has_vnet_hdr = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_TIMESTAMP: { @@ -3815,11 +3837,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tx_has_off = !!val; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_tx_has_off = !!val; + ret = 0; + } + release_sock(sk); return 0; } case PACKET_QDISC_BYPASS: diff --git a/net/packet/internal.h b/net/packet/internal.h index a1d2b2319ae9..3bb7c5fb3bff 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -112,10 +112,12 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, + unsigned int running; /* bind_lock must be held */ + unsigned int auxdata:1, /* writer must hold sock lock */ origdev:1, - has_vnet_hdr:1; + has_vnet_hdr:1, + tp_loss:1, + tp_tx_has_off:1; int pressure; int ifindex; /* bound device */ __be16 num; @@ -125,8 +127,6 @@ struct packet_sock { enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); From d805c5209350ae725e3a1ee0204ba27d9e75ce3e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 23 Apr 2018 15:51:38 -0700 Subject: [PATCH 544/660] net: ethtool: Add missing kernel doc for FEC parameters While adding support for ethtool::get_fecparam and set_fecparam, kernel doc for these functions was missed, add those. Fixes: 1a5f3da20bd9 ("net: ethtool: add support for forward error correction modes") Signed-off-by: Florian Fainelli Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index ebe41811ed34..b32cd2062f18 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -310,6 +310,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS * instead of the latter), any change to them will be overwritten * by kernel. Returns a negative error code or zero. + * @get_fecparam: Get the network device Forward Error Correction parameters. + * @set_fecparam: Set the network device Forward Error Correction parameters. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must From f8d6203780b73c07dc49ee421fedae8edb76b6e4 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 24 Apr 2018 17:09:30 +0100 Subject: [PATCH 545/660] sfc: ARFS filter IDs Associate an arbitrary ID with each ARFS filter, allowing to properly query for expiry. The association is maintained in a hash table, which is protected by a spinlock. v3: fix build warnings when CONFIG_RFS_ACCEL is disabled (thanks lkp-robot). v2: fixed uninitialised variable (thanks davem and lkp-robot). Fixes: 3af0f34290f6 ("sfc: replace asynchronous filter operations") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 80 ++++++++------ drivers/net/ethernet/sfc/efx.c | 143 ++++++++++++++++++++++++++ drivers/net/ethernet/sfc/efx.h | 21 ++++ drivers/net/ethernet/sfc/farch.c | 41 ++++++-- drivers/net/ethernet/sfc/net_driver.h | 36 +++++++ drivers/net/ethernet/sfc/rx.c | 62 ++++++++++- 6 files changed, 337 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 83ce229f4eb7..63036d9bf3e6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3999,29 +3999,6 @@ static void efx_ef10_prepare_flr(struct efx_nic *efx) atomic_set(&efx->active_queues, 0); } -static bool efx_ef10_filter_equal(const struct efx_filter_spec *left, - const struct efx_filter_spec *right) -{ - if ((left->match_flags ^ right->match_flags) | - ((left->flags ^ right->flags) & - (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) - return false; - - return memcmp(&left->outer_vid, &right->outer_vid, - sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) == 0; -} - -static unsigned int efx_ef10_filter_hash(const struct efx_filter_spec *spec) -{ - BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); - return jhash2((const u32 *)&spec->outer_vid, - (sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) / 4, - 0); - /* XXX should we randomise the initval? */ -} - /* Decide whether a filter should be exclusive or else should allow * delivery to additional recipients. Currently we decide that * filters for specific local unicast MAC and IP addresses are @@ -4346,7 +4323,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, goto out_unlock; match_pri = rc; - hash = efx_ef10_filter_hash(spec); + hash = efx_filter_spec_hash(spec); is_mc_recip = efx_filter_is_mc_recipient(spec); if (is_mc_recip) bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); @@ -4378,7 +4355,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, if (!saved_spec) { if (ins_index < 0) ins_index = i; - } else if (efx_ef10_filter_equal(spec, saved_spec)) { + } else if (efx_filter_spec_equal(spec, saved_spec)) { if (spec->priority < saved_spec->priority && spec->priority != EFX_FILTER_PRI_AUTO) { rc = -EPERM; @@ -4762,27 +4739,62 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx, static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, unsigned int filter_idx) { + struct efx_filter_spec *spec, saved_spec; struct efx_ef10_filter_table *table; - struct efx_filter_spec *spec; - bool ret; + struct efx_arfs_rule *rule = NULL; + bool ret = true, force = false; + u16 arfs_id; down_read(&efx->filter_sem); table = efx->filter_state; down_write(&table->lock); spec = efx_ef10_filter_entry_spec(table, filter_idx); - if (!spec || spec->priority != EFX_FILTER_PRI_HINT) { - ret = true; + if (!spec || spec->priority != EFX_FILTER_PRI_HINT) goto out_unlock; - } - if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, 0)) { + spin_lock_bh(&efx->rps_hash_lock); + if (!efx->rps_hash_table) { + /* In the absence of the table, we always return 0 to ARFS. */ + arfs_id = 0; + } else { + rule = efx_rps_hash_find(efx, spec); + if (!rule) + /* ARFS table doesn't know of this filter, so remove it */ + goto expire; + arfs_id = rule->arfs_id; + ret = efx_rps_check_rule(rule, filter_idx, &force); + if (force) + goto expire; + if (!ret) { + spin_unlock_bh(&efx->rps_hash_lock); + goto out_unlock; + } + } + if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id)) ret = false; - goto out_unlock; - } - + else if (rule) + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; +expire: + saved_spec = *spec; /* remove operation will kfree spec */ + spin_unlock_bh(&efx->rps_hash_lock); + /* At this point (since we dropped the lock), another thread might queue + * up a fresh insertion request (but the actual insertion will be held + * up by our possession of the filter table lock). In that case, it + * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that + * the rule is not removed by efx_rps_hash_del() below. + */ ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, filter_idx, true) == 0; + /* While we can't safely dereference rule (we dropped the lock), we can + * still test it for NULL. + */ + if (ret && rule) { + /* Expiring, so remove entry from ARFS table */ + spin_lock_bh(&efx->rps_hash_lock); + efx_rps_hash_del(efx, &saved_spec); + spin_unlock_bh(&efx->rps_hash_lock); + } out_unlock: up_write(&table->lock); up_read(&efx->filter_sem); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 692dd729ee2a..a4ebd8715494 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3027,6 +3027,10 @@ static int efx_init_struct(struct efx_nic *efx, mutex_init(&efx->mac_lock); #ifdef CONFIG_RFS_ACCEL mutex_init(&efx->rps_mutex); + spin_lock_init(&efx->rps_hash_lock); + /* Failure to allocate is not fatal, but may degrade ARFS performance */ + efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, + sizeof(*efx->rps_hash_table), GFP_KERNEL); #endif efx->phy_op = &efx_dummy_phy_operations; efx->mdio.dev = net_dev; @@ -3070,6 +3074,10 @@ static void efx_fini_struct(struct efx_nic *efx) { int i; +#ifdef CONFIG_RFS_ACCEL + kfree(efx->rps_hash_table); +#endif + for (i = 0; i < EFX_MAX_CHANNELS; i++) kfree(efx->channel[i]); @@ -3092,6 +3100,141 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops); } +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right) +{ + if ((left->match_flags ^ right->match_flags) | + ((left->flags ^ right->flags) & + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) + return false; + + return memcmp(&left->outer_vid, &right->outer_vid, + sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) == 0; +} + +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) +{ + BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); + return jhash2((const u32 *)&spec->outer_vid, + (sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) / 4, + 0); +} + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force) +{ + if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { + /* ARFS is currently updating this entry, leave it */ + return false; + } + if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { + /* ARFS tried and failed to update this, so it's probably out + * of date. Remove the filter and the ARFS rule entry. + */ + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; + *force = true; + return true; + } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ + /* ARFS has moved on, so old filter is not needed. Since we did + * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will + * not be removed by efx_rps_hash_del() subsequently. + */ + *force = true; + return true; + } + /* Remove it iff ARFS wants to. */ + return true; +} + +struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + u32 hash = efx_filter_spec_hash(spec); + + WARN_ON(!spin_is_locked(&efx->rps_hash_lock)); + if (!efx->rps_hash_table) + return NULL; + return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; +} + +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) + return rule; + } + return NULL; +} + +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + *new = false; + return rule; + } + } + rule = kmalloc(sizeof(*rule), GFP_ATOMIC); + *new = true; + if (rule) { + memcpy(&rule->spec, spec, sizeof(rule->spec)); + hlist_add_head(&rule->node, head); + } + return rule; +} + +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (WARN_ON(!head)) + return; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + /* Someone already reused the entry. We know that if + * this check doesn't fire (i.e. filter_id == REMOVING) + * then the REMOVING mark was put there by our caller, + * because caller is holding a lock on filter table and + * only holders of that lock set REMOVING. + */ + if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) + return; + hlist_del(node); + kfree(rule); + return; + } + } + /* We didn't find it. */ + WARN_ON(1); +} +#endif + /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because * (a) this is an infrequent control-plane operation and (b) n is small (max 64) */ diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index a3140e16fcef..3f759ebdcf10 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -186,6 +186,27 @@ static inline void efx_filter_rfs_expire(struct work_struct *data) {} #endif bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right); +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec); + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force); + +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec); + +/* @new is written to indicate if entry was newly added (true) or if an old + * entry was found and returned (false). + */ +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new); + +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); +#endif + /* RSS contexts */ struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 7174ef5e5c5e..c72adf8b52ea 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -2905,18 +2905,45 @@ bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, { struct efx_farch_filter_state *state = efx->filter_state; struct efx_farch_filter_table *table; - bool ret = false; + bool ret = false, force = false; + u16 arfs_id; down_write(&state->lock); + spin_lock_bh(&efx->rps_hash_lock); table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; if (test_bit(index, table->used_bitmap) && - table->spec[index].priority == EFX_FILTER_PRI_HINT && - rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id, - flow_id, 0)) { - efx_farch_filter_table_clear_entry(efx, table, index); - ret = true; - } + table->spec[index].priority == EFX_FILTER_PRI_HINT) { + struct efx_arfs_rule *rule = NULL; + struct efx_filter_spec spec; + efx_farch_filter_to_gen_spec(&spec, &table->spec[index]); + if (!efx->rps_hash_table) { + /* In the absence of the table, we always returned 0 to + * ARFS, so use the same to query it. + */ + arfs_id = 0; + } else { + rule = efx_rps_hash_find(efx, &spec); + if (!rule) { + /* ARFS table doesn't know of this filter, remove it */ + force = true; + } else { + arfs_id = rule->arfs_id; + if (!efx_rps_check_rule(rule, index, &force)) + goto out_unlock; + } + } + if (force || rps_may_expire_flow(efx->net_dev, spec.dmaq_id, + flow_id, arfs_id)) { + if (rule) + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; + efx_rps_hash_del(efx, &spec); + efx_farch_filter_table_clear_entry(efx, table, index); + ret = true; + } + } +out_unlock: + spin_unlock_bh(&efx->rps_hash_lock); up_write(&state->lock); return ret; } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index eea3808b3f25..65568925c3ef 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -734,6 +734,35 @@ struct efx_rss_context { }; #ifdef CONFIG_RFS_ACCEL +/* Order of these is important, since filter_id >= %EFX_ARFS_FILTER_ID_PENDING + * is used to test if filter does or will exist. + */ +#define EFX_ARFS_FILTER_ID_PENDING -1 +#define EFX_ARFS_FILTER_ID_ERROR -2 +#define EFX_ARFS_FILTER_ID_REMOVING -3 +/** + * struct efx_arfs_rule - record of an ARFS filter and its IDs + * @node: linkage into hash table + * @spec: details of the filter (used as key for hash table). Use efx->type to + * determine which member to use. + * @rxq_index: channel to which the filter will steer traffic. + * @arfs_id: filter ID which was returned to ARFS + * @filter_id: index in software filter table. May be + * %EFX_ARFS_FILTER_ID_PENDING if filter was not inserted yet, + * %EFX_ARFS_FILTER_ID_ERROR if filter insertion failed, or + * %EFX_ARFS_FILTER_ID_REMOVING if expiry is currently removing the filter. + */ +struct efx_arfs_rule { + struct hlist_node node; + struct efx_filter_spec spec; + u16 rxq_index; + u16 arfs_id; + s32 filter_id; +}; + +/* Size chosen so that the table is one page (4kB) */ +#define EFX_ARFS_HASH_TABLE_SIZE 512 + /** * struct efx_async_filter_insertion - Request to asynchronously insert a filter * @net_dev: Reference to the netdevice @@ -873,6 +902,10 @@ struct efx_async_filter_insertion { * @rps_expire_channel's @rps_flow_id * @rps_slot_map: bitmap of in-flight entries in @rps_slot * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work() + * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and + * @rps_next_id). + * @rps_hash_table: Mapping between ARFS filters and their various IDs + * @rps_next_id: next arfs_id for an ARFS filter * @active_queues: Count of RX and TX queues that haven't been flushed and drained. * @rxq_flush_pending: Count of number of receive queues that need to be flushed. * Decremented when the efx_flush_rx_queue() is called. @@ -1029,6 +1062,9 @@ struct efx_nic { unsigned int rps_expire_index; unsigned long rps_slot_map; struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT]; + spinlock_t rps_hash_lock; + struct hlist_head *rps_hash_table; + u32 rps_next_id; #endif atomic_t active_queues; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 9c593c661cbf..64a94f242027 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -834,9 +834,29 @@ static void efx_filter_rfs_work(struct work_struct *data) struct efx_nic *efx = netdev_priv(req->net_dev); struct efx_channel *channel = efx_get_channel(efx, req->rxq_index); int slot_idx = req - efx->rps_slot; + struct efx_arfs_rule *rule; + u16 arfs_id = 0; int rc; rc = efx->type->filter_insert(efx, &req->spec, true); + if (efx->rps_hash_table) { + spin_lock_bh(&efx->rps_hash_lock); + rule = efx_rps_hash_find(efx, &req->spec); + /* The rule might have already gone, if someone else's request + * for the same spec was already worked and then expired before + * we got around to our work. In that case we have nothing + * tying us to an arfs_id, meaning that as soon as the filter + * is considered for expiry it will be removed. + */ + if (rule) { + if (rc < 0) + rule->filter_id = EFX_ARFS_FILTER_ID_ERROR; + else + rule->filter_id = rc; + arfs_id = rule->arfs_id; + } + spin_unlock_bh(&efx->rps_hash_lock); + } if (rc >= 0) { /* Remember this so we can check whether to expire the filter * later. @@ -848,18 +868,18 @@ static void efx_filter_rfs_work(struct work_struct *data) if (req->spec.ether_type == htons(ETH_P_IP)) netif_info(efx, rx_status, efx->net_dev, - "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", + "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n", (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), - req->rxq_index, req->flow_id, rc); + req->rxq_index, req->flow_id, rc, arfs_id); else netif_info(efx, rx_status, efx->net_dev, - "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n", + "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n", (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), - req->rxq_index, req->flow_id, rc); + req->rxq_index, req->flow_id, rc, arfs_id); } /* Release references */ @@ -872,8 +892,10 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, { struct efx_nic *efx = netdev_priv(net_dev); struct efx_async_filter_insertion *req; + struct efx_arfs_rule *rule; struct flow_keys fk; int slot_idx; + bool new; int rc; /* find a free slot */ @@ -926,12 +948,42 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, req->spec.rem_port = fk.ports.src; req->spec.loc_port = fk.ports.dst; + if (efx->rps_hash_table) { + /* Add it to ARFS hash table */ + spin_lock(&efx->rps_hash_lock); + rule = efx_rps_hash_add(efx, &req->spec, &new); + if (!rule) { + rc = -ENOMEM; + goto out_unlock; + } + if (new) + rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER; + rc = rule->arfs_id; + /* Skip if existing or pending filter already does the right thing */ + if (!new && rule->rxq_index == rxq_index && + rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING) + goto out_unlock; + rule->rxq_index = rxq_index; + rule->filter_id = EFX_ARFS_FILTER_ID_PENDING; + spin_unlock(&efx->rps_hash_lock); + } else { + /* Without an ARFS hash table, we just use arfs_id 0 for all + * filters. This means if multiple flows hash to the same + * flow_id, all but the most recently touched will be eligible + * for expiry. + */ + rc = 0; + } + + /* Queue the request */ dev_hold(req->net_dev = net_dev); INIT_WORK(&req->work, efx_filter_rfs_work); req->rxq_index = rxq_index; req->flow_id = flow_id; schedule_work(&req->work); - return 0; + return rc; +out_unlock: + spin_unlock(&efx->rps_hash_lock); out_clear: clear_bit(slot_idx, &efx->rps_slot_map); return rc; From 86e11757d8b28d8266065beaa9d391d49426797b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Apr 2018 14:53:51 +0200 Subject: [PATCH 546/660] riscv: select DMA_DIRECT_OPS instead of redefining it DMA_DIRECT_OPS is defined in lib/Kconfig, so don't duplicate it in arch/riscv/Kconfig. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 23d8acca5c90..cd4fd85fde84 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -11,6 +11,7 @@ config RISCV select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK + select DMA_DIRECT_OPS select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES select GENERIC_IRQ_SHOW @@ -89,9 +90,6 @@ config PGTABLE_LEVELS config HAVE_KPROBES def_bool n -config DMA_DIRECT_OPS - def_bool y - menu "Platform type" choice From 5b7252a268706a69c803fd88cfd6d0176fca0041 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Apr 2018 08:57:52 +0200 Subject: [PATCH 547/660] riscv: there is no So don't list it as generic-y. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 1e5fd280fb4d..4286a5f83876 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -15,7 +15,6 @@ generic-y += fcntl.h generic-y += futex.h generic-y += hardirq.h generic-y += hash.h -generic-y += handle_irq.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h From 85602bea297fc4e5223adbf7006dcce9aa694f17 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Wed, 21 Mar 2018 22:26:31 +0100 Subject: [PATCH 548/660] RISC-V: build vdso-dummy.o with -no-pie Debian toolcahin defaults to PIE, and I guess that will also be the case of most distributions. This causes the following build failure: AS arch/riscv/kernel/vdso/getcpu.o AS arch/riscv/kernel/vdso/flush_icache.o VDSOLD arch/riscv/kernel/vdso/vdso.so.dbg OBJCOPY arch/riscv/kernel/vdso/vdso.so AS arch/riscv/kernel/vdso/vdso.o VDSOLD arch/riscv/kernel/vdso/vdso-dummy.o LD arch/riscv/kernel/vdso/vdso-syms.o riscv64-linux-gnu-ld: attempted static link of dynamic object `arch/riscv/kernel/vdso/vdso-dummy.o' make[2]: *** [arch/riscv/kernel/vdso/Makefile:43: arch/riscv/kernel/vdso/vdso-syms.o] Error 1 make[1]: *** [scripts/Makefile.build:575: arch/riscv/kernel/vdso] Error 2 make: *** [Makefile:1018: arch/riscv/kernel] Error 2 While the root Makefile correctly passes "-fno-PIE" to build individual object files, the RISC-V kernel also builds vdso-dummy.o as an executable, which is therefore linked as PIE. Fix that by updating this specific link rule to also include "-no-pie". Signed-off-by: Aurelien Jarno Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 324568d33921..f6561b783b61 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -52,7 +52,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Add -lgcc so rv32 gets static muldi3 and lshrdi3 definitions. # Make sure only to export the intended __vdso_xxx symbol offsets. quiet_cmd_vdsold = VDSOLD $@ - cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \ + cmd_vdsold = $(CC) $(KCFLAGS) $(call cc-option, -no-pie) -nostdlib $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \ $(CROSS_COMPILE)objcopy \ $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ From 8e0428a7e7a8e521540f7f87ce1c55ae04acd708 Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Tue, 24 Apr 2018 18:24:43 +0100 Subject: [PATCH 549/660] ALSA: usb-audio: ADC3: Fix channel mapping conversion for ADC3. The channel mapping is defined by bChRelationship, not bChPurpose. Fixes: 9a2fe9b801f5 ("ALSA: usb: initial USB Audio Device Class 3.0 support") Reviewed-by: Ruslan Bilovol Signed-off-by: Michael Drake Signed-off-by: Jorge Sanjuan Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 6a8f5843334e..956be9f7c72a 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -349,7 +349,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor * TODO: this conversion is not complete, update it * after adding UAC3 values to asound.h */ - switch (is->bChPurpose) { + switch (is->bChRelationship) { case UAC3_CH_MONO: map = SNDRV_CHMAP_MONO; break; From 59275a0c037ed6fabd6354730f1e3104264ab719 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 24 Apr 2018 13:11:22 +0100 Subject: [PATCH 550/660] arm64: ptrace: remove addr_limit manipulation We transiently switch to KERNEL_DS in compat_ptrace_gethbpregs() and compat_ptrace_sethbpregs(), but in either case this is pointless as we don't perform any uaccess during this window. let's rip out the redundant addr_limit manipulation. Acked-by: Catalin Marinas Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 71d99af24ef2..7a832ce6552f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1458,9 +1458,7 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, { int ret; u32 kdata; - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); /* Watchpoint */ if (num < 0) { ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata); @@ -1471,7 +1469,6 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, } else { ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata); } - set_fs(old_fs); if (!ret) ret = put_user(kdata, data); @@ -1484,7 +1481,6 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, { int ret; u32 kdata = 0; - mm_segment_t old_fs = get_fs(); if (num == 0) return 0; @@ -1493,12 +1489,10 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, if (ret) return ret; - set_fs(KERNEL_DS); if (num < 0) ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata); else ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata); - set_fs(old_fs); return ret; } From ed231ae384fdfcb546b63b2fe7add65029e3a94c Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 24 Apr 2018 10:39:43 -0500 Subject: [PATCH 551/660] arm64/kernel: rename module_emit_adrp_veneer->module_emit_veneer_for_adrp Commit a257e02579e ("arm64/kernel: don't ban ADRP to work around Cortex-A53 erratum #843419") introduced a function whose name ends with "_veneer". This clashes with commit bd8b22d2888e ("Kbuild: kallsyms: ignore veneers emitted by the ARM linker"), which removes symbols ending in "_veneer" from kallsyms. The problem was manifested as 'perf test -vvvvv vmlinux' failed, correctly claiming the symbol 'module_emit_adrp_veneer' was present in vmlinux, but not in kallsyms. ... ERR : 0xffff00000809aa58: module_emit_adrp_veneer not on kallsyms ... test child finished with -1 ---- end ---- vmlinux symtab matches kallsyms: FAILED! Fix the problem by renaming module_emit_adrp_veneer to module_emit_veneer_for_adrp. Now the test passes. Fixes: a257e02579e ("arm64/kernel: don't ban ADRP to work around Cortex-A53 erratum #843419") Acked-by: Ard Biesheuvel Cc: Will Deacon Cc: Catalin Marinas Cc: Michal Marek Signed-off-by: Kim Phillips Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.h | 2 +- arch/arm64/kernel/module-plts.c | 2 +- arch/arm64/kernel/module.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index b6dbbe3123a9..97d0ef12e2ff 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -39,7 +39,7 @@ struct mod_arch_specific { u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym); -u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val); +u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val); #ifdef CONFIG_RANDOMIZE_BASE extern u64 module_alloc_base; diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index fa3637284a3d..f0690c2ca3e0 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -43,7 +43,7 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, } #ifdef CONFIG_ARM64_ERRATUM_843419 -u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val) +u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val) { struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : &mod->arch.init; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 719fde8dcc19..155fd91e78f4 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -215,7 +215,7 @@ static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val) insn &= ~BIT(31); } else { /* out of range for ADR -> emit a veneer */ - val = module_emit_adrp_veneer(mod, place, val & ~0xfff); + val = module_emit_veneer_for_adrp(mod, place, val & ~0xfff); if (!val) return -ENOEXEC; insn = aarch64_insn_gen_branch_imm((u64)place, val, From 9478f1927e6ef9ef5e1ad761af1c98aa8e40b7f5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 3 Apr 2018 11:22:51 +0100 Subject: [PATCH 552/660] arm64: only advance singlestep for user instruction traps Our arm64_skip_faulting_instruction() helper advances the userspace singlestep state machine, but this is also called by the kernel BRK handler, as used for WARN*(). Thus, if we happen to hit a WARN*() while the user singlestep state machine is in the active-no-pending state, we'll advance to the active-pending state without having executed a user instruction, and will take a step exception earlier than expected when we return to userspace. Let's fix this by only advancing the state machine when skipping a user instruction. Signed-off-by: Mark Rutland Cc: Andrey Konovalov Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 1cb2749a72bf..8bbdc17e49df 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -277,7 +277,8 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) * If we were single stepping, we want to get the step exception after * we return from the trap. */ - user_fastforward_single_step(current); + if (user_mode(regs)) + user_fastforward_single_step(current); } static LIST_HEAD(undef_hook); From ad40bdafb495f30e5af837e15b3c2a4cb2176e47 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 23 Dec 2017 01:43:23 +0100 Subject: [PATCH 553/660] arm64: support __int128 with clang Commit fb8722735f50 ("arm64: support __int128 on gcc 5+") added support for arm64 __int128 with gcc with a version-conditional, but neglected to enable this for clang, which in fact appears to support aarch64 __int128. This commit therefore enables it if the compiler is clang, using the same type of makefile conditional used elsewhere in the tree. Signed-off-by: Jason A. Donenfeld Signed-off-by: Will Deacon --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 15402861bb59..87f7d2f9f17c 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -56,7 +56,11 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) +ifeq ($(cc-name),clang) +KBUILD_CFLAGS += -DCONFIG_ARCH_SUPPORTS_INT128 +else KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128) +endif ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian From b40000325044433cd350725e2025214ae48b17fd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Apr 2018 16:37:04 +0300 Subject: [PATCH 554/660] virtio_balloon: add array of stat names Jason Wang points out that it's very hard for users to build an array of stat names. The naive thing is to use VIRTIO_BALLOON_S_NR but that breaks if we add more stats - as done e.g. recently by commit 6c64fe7f2 ("virtio_balloon: export hugetlb page allocation counts"). Let's add an array of reasonably readable names. Fixes: 6c64fe7f2 ("virtio_balloon: export hugetlb page allocation counts") Cc: Jason Wang Signed-off-by: Michael S. Tsirkin Reviewed-by: Jonathan Helman --- include/uapi/linux/virtio_balloon.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 40297a3181ed..13b8cb563892 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -57,6 +57,21 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */ #define VIRTIO_BALLOON_S_NR 10 +#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \ + VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \ + VIRTIO_BALLOON_S_NAMES_prefix "swap-out", \ + VIRTIO_BALLOON_S_NAMES_prefix "major-faults", \ + VIRTIO_BALLOON_S_NAMES_prefix "minor-faults", \ + VIRTIO_BALLOON_S_NAMES_prefix "free-memory", \ + VIRTIO_BALLOON_S_NAMES_prefix "total-memory", \ + VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \ + VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \ + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \ + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \ +} + +#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("") + /* * Memory statistics structure. * Driver fills an array of these structures and passes to device. From 292c34c10249c64a70def442f0d977bf9d466ed7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 24 Apr 2018 11:20:10 -0700 Subject: [PATCH 555/660] perf pmu: Fix core PMU alias list for X86 platform When counting uncore event with alias, core event is mistakenly involved, for example: perf stat --no-merge -e "unc_m_cas_count.all" -C0 sleep 1 Performance counter stats for 'CPU(s) 0': 0 unc_m_cas_count.all [uncore_imc_4] 0 unc_m_cas_count.all [uncore_imc_2] 0 unc_m_cas_count.all [uncore_imc_0] 153,640 unc_m_cas_count.all [cpu] 0 unc_m_cas_count.all [uncore_imc_5] 25,026 unc_m_cas_count.all [uncore_imc_3] 0 unc_m_cas_count.all [uncore_imc_1] 1.001447890 seconds time elapsed The reason is that current implementation doesn't check PMU name of a event when adding its alias into the alias list for core PMU. The uncore event aliases are mistakenly added. This bug was introduced in: commit 14b22ae028de ("perf pmu: Add helper function is_pmu_core to detect PMU CORE devices") Checking the PMU name for all PMUs on X86 and other architectures except ARM. There is no behavior change for ARM. Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Agustin Vega-Frias Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Fixes: 14b22ae028de ("perf pmu: Add helper function is_pmu_core to detect PMU CORE devices") Link: http://lkml.kernel.org/r/1524594014-79243-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index af4bedf4cf98..d2fb597c9a8c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -539,9 +539,10 @@ static bool pmu_is_uncore(const char *name) /* * PMU CORE devices have different name other than cpu in sysfs on some - * platforms. looking for possible sysfs files to identify as core device. + * platforms. + * Looking for possible sysfs files to identify the arm core device. */ -static int is_pmu_core(const char *name) +static int is_arm_pmu_core(const char *name) { struct stat st; char path[PATH_MAX]; @@ -550,12 +551,6 @@ static int is_pmu_core(const char *name) if (!sysfs) return 0; - /* Look for cpu sysfs (x86 and others) */ - scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); - if ((stat(path, &st) == 0) && - (strncmp(name, "cpu", strlen("cpu")) == 0)) - return 1; - /* Look for cpu sysfs (specific to arm) */ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", sysfs, name); @@ -668,6 +663,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) struct pmu_events_map *map; struct pmu_event *pe; const char *name = pmu->name; + const char *pname; map = perf_pmu__find_map(pmu); if (!map) @@ -686,11 +682,9 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) break; } - if (!is_pmu_core(name)) { - /* check for uncore devices */ - if (pe->pmu == NULL) - continue; - if (strncmp(pe->pmu, name, strlen(pe->pmu))) + if (!is_arm_pmu_core(name)) { + pname = pe->pmu ? pe->pmu : "cpu"; + if (strncmp(pname, name, strlen(pname))) continue; } From 30060eaed769039c6e523b9d159f2b2858fa8907 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 24 Apr 2018 11:20:11 -0700 Subject: [PATCH 556/660] perf stat: Print out hint for mixed PMU group error Perf doesn't support mixed events from different PMUs (except software event) in a group. For this case, only "" or "" are printed out. There is no hint which guides users to fix the issue. Checking the PMU type of events to determine if they are from the same PMU. There may be false alarm for the checking. E.g. the core PMU has different PMU type. But it should not happen often. The false alarm can also be tolerated, because: - It only happens on error path. - It just provides a possible solution for the issue. Signed-off-by: Kan Liang Cc: Agustin Vega-Frias Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Link: http://lkml.kernel.org/r/1524594014-79243-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 147a27e8c937..30e6b374e095 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -172,6 +172,7 @@ static bool interval_count; static const char *output_name; static int output_fd; static int print_free_counters_hint; +static int print_mixed_hw_group_error; struct perf_stat { bool record; @@ -1126,6 +1127,30 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static bool is_mixed_hw_group(struct perf_evsel *counter) +{ + struct perf_evlist *evlist = counter->evlist; + u32 pmu_type = counter->attr.type; + struct perf_evsel *pos; + + if (counter->nr_members < 2) + return false; + + evlist__for_each_entry(evlist, pos) { + /* software events can be part of any hardware group */ + if (pos->attr.type == PERF_TYPE_SOFTWARE) + continue; + if (pmu_type == PERF_TYPE_SOFTWARE) { + pmu_type = pos->attr.type; + continue; + } + if (pmu_type != pos->attr.type) + return true; + } + + return false; +} + static void printout(int id, int nr, struct perf_evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st) @@ -1178,8 +1203,11 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep); - if (counter->supported) + if (counter->supported) { print_free_counters_hint = 1; + if (is_mixed_hw_group(counter)) + print_mixed_hw_group_error = 1; + } fprintf(stat_config.output, "%-*s%s", csv_output ? 0 : unit_width, @@ -1757,6 +1785,11 @@ static void print_footer(void) " echo 0 > /proc/sys/kernel/nmi_watchdog\n" " perf stat ...\n" " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); + + if (print_mixed_hw_group_error) + fprintf(output, + "The events in group usually have to be from " + "the same PMU. Try reorganizing the group.\n"); } static void print_counters(struct timespec *ts, int argc, const char **argv) From 121f325f34caf9a7654ec8a50e20942ed9d6dafc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 24 Apr 2018 11:20:12 -0700 Subject: [PATCH 557/660] perf evsel: Only fall back group read for leader Perf doesn't support mixed events from different PMUs (except software event) in a group. The perf stat should output / for all events, but it doesn't. For example, perf stat -e '{cycles,uncore_imc_5/umask=0xF,event=0x4/,instructions}' cycles uncore_imc_5/umask=0xF,event=0x4/ 1,024,300 instructions If perf fails to open an event, it doesn't error out directly. It will disable some features and retry, until the event is opened or all features are disabled. The disabled features will not be re-enabled. The group read is one of these features. For the example as above, the IMC event and the leader event "cycles" are from different PMUs. Opening the IMC event must fail. The group read feature must be disabled for IMC event and the followed event "instructions". The "instructions" event has the same PMU as the leader "cycles". It can be opened successfully. Since the group read feature has been disabled, the "instructions" event will be read as a single event, which definitely has a value. The group read fallback is still useful for the case which kernel doesn't support group read. It is good enough to be handled only by the leader. For the fallback request from members, it must be caused by an error. The fallback only breaks the semantics of group. Limit the group read fallback only for the leader. Committer testing: On a broadwell t450s notebook: Before: # perf stat -e '{cycles,unc_cbo_cache_lookup.read_i,instructions}' sleep 1 Performance counter stats for 'sleep 1': cycles unc_cbo_cache_lookup.read_i 818,206 instructions 1.003170887 seconds time elapsed Some events weren't counted. Try disabling the NMI watchdog: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog After: # perf stat -e '{cycles,unc_cbo_cache_lookup.read_i,instructions}' sleep 1 Performance counter stats for 'sleep 1': cycles unc_cbo_cache_lookup.read_i instructions 1.001380511 seconds time elapsed Some events weren't counted. Try disabling the NMI watchdog: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog # Reported-by: Andi Kleen Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Agustin Vega-Frias Cc: Ganapatrao Kulkarni Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Fixes: 82bf311e15d2 ("perf stat: Use group read for event groups") Link: http://lkml.kernel.org/r/1524594014-79243-3-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 26bdeecc0452..4cd2cf93f726 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1925,7 +1925,8 @@ try_fallback: goto fallback_missing_features; } else if (!perf_missing_features.group_read && evsel->attr.inherit && - (evsel->attr.read_format & PERF_FORMAT_GROUP)) { + (evsel->attr.read_format & PERF_FORMAT_GROUP) && + perf_evsel__is_group_leader(evsel)) { perf_missing_features.group_read = true; pr_debug2("switching off group read\n"); goto fallback_missing_features; From 80ee8c588afde077cb0439e15129579a267916c4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 24 Apr 2018 11:20:14 -0700 Subject: [PATCH 558/660] perf stat: Fix duplicate PMU name for interval print PMU name is printed repeatedly for interval print, for example: perf stat --no-merge -e 'unc_m_clockticks' -a -I 1000 # time counts unit events 1.001053069 243,702,144 unc_m_clockticks [uncore_imc_4] 1.001053069 244,268,304 unc_m_clockticks [uncore_imc_2] 1.001053069 244,427,386 unc_m_clockticks [uncore_imc_0] 1.001053069 244,583,760 unc_m_clockticks [uncore_imc_5] 1.001053069 244,738,971 unc_m_clockticks [uncore_imc_3] 1.001053069 244,880,309 unc_m_clockticks [uncore_imc_1] 2.002024821 240,818,200 unc_m_clockticks [uncore_imc_4] [uncore_imc_4] 2.002024821 240,767,812 unc_m_clockticks [uncore_imc_2] [uncore_imc_2] 2.002024821 240,764,215 unc_m_clockticks [uncore_imc_0] [uncore_imc_0] 2.002024821 240,759,504 unc_m_clockticks [uncore_imc_5] [uncore_imc_5] 2.002024821 240,755,992 unc_m_clockticks [uncore_imc_3] [uncore_imc_3] 2.002024821 240,750,403 unc_m_clockticks [uncore_imc_1] [uncore_imc_1] For each print, the PMU name is unconditionally appended to the counter->name. Need to check the counter->name first. If the PMU name is already appended, do nothing. Committer notes: Add and use perf_evsel->uniquified_name bool instead of doing the more expensive strstr(event->name, pmu->name). Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Agustin Vega-Frias Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Fixes: 8c5421c016a4 ("perf pmu: Display pmu name when printing unmerged events in stat") Link: http://lkml.kernel.org/r/1524594014-79243-5-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 ++++- tools/perf/util/evsel.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 30e6b374e095..f17dc601b0f3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1284,7 +1284,8 @@ static void uniquify_event_name(struct perf_evsel *counter) char *new_name; char *config; - if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name, + if (counter->uniquified_name || + !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, strlen(counter->pmu_name))) return; @@ -1302,6 +1303,8 @@ static void uniquify_event_name(struct perf_evsel *counter) counter->name = new_name; } } + + counter->uniquified_name = true; } static void collect_all_aliases(struct perf_evsel *counter, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d3ee3af618ef..92ec009a292d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -115,6 +115,7 @@ struct perf_evsel { unsigned int sample_size; int id_pos; int is_pos; + bool uniquified_name; bool snapshot; bool supported; bool needs_swap; From d6fef10c750e64f248543d2eee7c86a4a019f7ec Mon Sep 17 00:00:00 2001 From: Md Fahad Iqbal Polash Date: Mon, 16 Apr 2018 10:07:03 -0700 Subject: [PATCH 559/660] ice: Fix insufficient memory issue in ice_aq_manage_mac_read For the MAC read operation, the device can return up to two (LAN and WoL) MAC addresses. Without access to adequate memory, the device will return an error. Fixed this by allocating the right amount of memory. Also, logic to detect and copy the LAN MAC address into the port_info structure has been added. Note that the WoL MAC address is ignored currently as the WoL feature isn't supported yet. Fixes: dc49c7723676 ("ice: Get MAC/PHY/link info and scheduler topology") Signed-off-by: Md Fahad Iqbal Polash Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 22 ++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 21977ec984c4..71d032cc5fa7 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -78,6 +78,7 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size, struct ice_aq_desc desc; enum ice_status status; u16 flags; + u8 i; cmd = &desc.params.mac_read; @@ -98,8 +99,16 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size, return ICE_ERR_CFG; } - ether_addr_copy(hw->port_info->mac.lan_addr, resp->mac_addr); - ether_addr_copy(hw->port_info->mac.perm_addr, resp->mac_addr); + /* A single port can report up to two (LAN and WoL) addresses */ + for (i = 0; i < cmd->num_addr; i++) + if (resp[i].addr_type == ICE_AQC_MAN_MAC_ADDR_TYPE_LAN) { + ether_addr_copy(hw->port_info->mac.lan_addr, + resp[i].mac_addr); + ether_addr_copy(hw->port_info->mac.perm_addr, + resp[i].mac_addr); + break; + } + return 0; } @@ -464,9 +473,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_sched; - /* Get port MAC information */ - mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp); - mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL); + /* Get MAC information */ + /* A single port can report up to two (LAN and WoL) addresses */ + mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2, + sizeof(struct ice_aqc_manage_mac_read_resp), + GFP_KERNEL); + mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp); if (!mac_buf) { status = ICE_ERR_NO_MEMORY; From 53fa1f6e8a5958da698a31edf366ffe90596b490 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Apr 2018 18:23:50 +0200 Subject: [PATCH 560/660] ACPI / video: Only default only_lcd to true on Win8-ready _desktops_ Commit 5928c281524f (ACPI / video: Default lcd_only to true on Win8-ready and newer machines) made only_lcd default to true on all machines where acpi_osi_is_win8() returns true, including laptops. The purpose of this is to avoid the bogus / non-working acpi backlight interface which many newer BIOS-es define on desktop machines. But this is causing a regression on some laptops, specifically on the Dell XPS 13 2013 model, which does not have the LCD flag set for its fully functional ACPI backlight interface. Rather then DMI quirking our way out of this, this commits changes the logic for setting only_lcd to true, to only do this on machines with a desktop (or server) dmi chassis-type. Note that we cannot simply only check the chassis-type and not register the backlight interface based on that as there are some laptops and tablets which have their chassis-type set to "3" aka desktop. Hopefully the combination of checking the LCD flag, but only on devices with a desktop(ish) chassis-type will avoid the needs for DMI quirks for this, or at least limit the amount of DMI quirks which we need to a minimum. Fixes: 5928c281524f (ACPI / video: Default lcd_only to true on Win8-ready and newer machines) Reported-and-tested-by: James Hogan Signed-off-by: Hans de Goede Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 76fb96966f7b..2f2e737be0f8 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -2123,6 +2123,25 @@ static int __init intel_opregion_present(void) return opregion; } +static bool dmi_is_desktop(void) +{ + const char *chassis_type; + + chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE); + if (!chassis_type) + return false; + + if (!strcmp(chassis_type, "3") || /* 3: Desktop */ + !strcmp(chassis_type, "4") || /* 4: Low Profile Desktop */ + !strcmp(chassis_type, "5") || /* 5: Pizza Box */ + !strcmp(chassis_type, "6") || /* 6: Mini Tower */ + !strcmp(chassis_type, "7") || /* 7: Tower */ + !strcmp(chassis_type, "11")) /* 11: Main Server Chassis */ + return true; + + return false; +} + int acpi_video_register(void) { int ret = 0; @@ -2143,8 +2162,12 @@ int acpi_video_register(void) * win8 ready (where we also prefer the native backlight driver, so * normally the acpi_video code should not register there anyways). */ - if (only_lcd == -1) - only_lcd = acpi_osi_is_win8(); + if (only_lcd == -1) { + if (dmi_is_desktop() && acpi_osi_is_win8()) + only_lcd = true; + else + only_lcd = false; + } dmi_check_system(video_dmi_table); From 75569c182e4f65cd8826a5853dc9cbca703cbd0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 12 Apr 2018 16:34:19 +0200 Subject: [PATCH 561/660] drm/amdgpu: set COMPUTE_PGM_RSRC1 for SGPR/VGPR clearing shaders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise, the SQ may skip some of the register writes, or shader waves may be allocated where we don't expect them, so that as a result we don't actually reset all of the register SRAMs. This can lead to spurious ECC errors later on if a shader uses an uninitialized register. Signed-off-by: Nicolai Hähnle Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b0e591eaa71a..e14263fca1c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1459,10 +1459,11 @@ static const u32 sgpr_init_compute_shader[] = static const u32 vgpr_init_regs[] = { mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, - mmCOMPUTE_RESOURCE_LIMITS, 0, + mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ mmCOMPUTE_NUM_THREAD_X, 256*4, mmCOMPUTE_NUM_THREAD_Y, 1, mmCOMPUTE_NUM_THREAD_Z, 1, + mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ mmCOMPUTE_PGM_RSRC2, 20, mmCOMPUTE_USER_DATA_0, 0xedcedc00, mmCOMPUTE_USER_DATA_1, 0xedcedc01, @@ -1479,10 +1480,11 @@ static const u32 vgpr_init_regs[] = static const u32 sgpr1_init_regs[] = { mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, - mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, + mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ mmCOMPUTE_NUM_THREAD_X, 256*5, mmCOMPUTE_NUM_THREAD_Y, 1, mmCOMPUTE_NUM_THREAD_Z, 1, + mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ mmCOMPUTE_PGM_RSRC2, 20, mmCOMPUTE_USER_DATA_0, 0xedcedc00, mmCOMPUTE_USER_DATA_1, 0xedcedc01, @@ -1503,6 +1505,7 @@ static const u32 sgpr2_init_regs[] = mmCOMPUTE_NUM_THREAD_X, 256*5, mmCOMPUTE_NUM_THREAD_Y, 1, mmCOMPUTE_NUM_THREAD_Z, 1, + mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ mmCOMPUTE_PGM_RSRC2, 20, mmCOMPUTE_USER_DATA_0, 0xedcedc00, mmCOMPUTE_USER_DATA_1, 0xedcedc01, From 682e6b4da5cbe8e9a53f979a58c2a9d7dc997175 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 10 Apr 2018 21:49:32 +1000 Subject: [PATCH 562/660] rtc: opal: Fix OPAL RTC driver OPAL_BUSY loops The OPAL RTC driver does not sleep in case it gets OPAL_BUSY or OPAL_BUSY_EVENT from firmware, which causes large scheduling latencies, up to 50 seconds have been observed here when RTC stops responding (BMC reboot can do it). Fix this by converting it to the standard form OPAL_BUSY loop that sleeps. Fixes: 628daa8d5abf ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks") Cc: stable@vger.kernel.org # v3.2+ Signed-off-by: Nicholas Piggin Acked-by: Alexandre Belloni Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-rtc.c | 8 +++-- drivers/rtc/rtc-opal.c | 37 ++++++++++++++--------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index f8868864f373..aa2a5139462e 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -48,10 +48,12 @@ unsigned long __init opal_get_boot_time(void) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); - else if (rc == OPAL_BUSY) - mdelay(10); + } else if (rc == OPAL_BUSY) { + mdelay(OPAL_BUSY_DELAY_MS); + } } if (rc != OPAL_SUCCESS) return 0; diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index 304e891e35fc..60f2250fd96b 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -57,7 +57,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms) static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) { - long rc = OPAL_BUSY; + s64 rc = OPAL_BUSY; int retries = 10; u32 y_m_d; u64 h_m_s_ms; @@ -66,13 +66,17 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) + if (rc == OPAL_BUSY_EVENT) { + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); - else if (retries-- && (rc == OPAL_HARDWARE - || rc == OPAL_INTERNAL_ERROR)) - msleep(10); - else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) - break; + } else if (rc == OPAL_BUSY) { + msleep(OPAL_BUSY_DELAY_MS); + } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) { + if (retries--) { + msleep(10); /* Wait 10ms before retry */ + rc = OPAL_BUSY; /* go around again */ + } + } } if (rc != OPAL_SUCCESS) @@ -87,21 +91,26 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) { - long rc = OPAL_BUSY; + s64 rc = OPAL_BUSY; int retries = 10; u32 y_m_d = 0; u64 h_m_s_ms = 0; tm_to_opal(tm, &y_m_d, &h_m_s_ms); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_write(y_m_d, h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) + if (rc == OPAL_BUSY_EVENT) { + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); - else if (retries-- && (rc == OPAL_HARDWARE - || rc == OPAL_INTERNAL_ERROR)) - msleep(10); - else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) - break; + } else if (rc == OPAL_BUSY) { + msleep(OPAL_BUSY_DELAY_MS); + } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) { + if (retries--) { + msleep(10); /* Wait 10ms before retry */ + rc = OPAL_BUSY; /* go around again */ + } + } } return rc == OPAL_SUCCESS ? 0 : -EIO; From 295810516e302be90fe469b17b8ac0ac486da3bf Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Wed, 25 Apr 2018 09:44:45 +0530 Subject: [PATCH 563/660] ALSA: usx2y: Change return type to vm_fault_t Use new return type vm_fault_t for fault handler. For now, this is just documenting that the function returns a VM_FAULT value rather than an errno. Once all instances are converted, vm_fault_t will become a distinct type. Commit 1c8f422059ae ("mm: change return type to vm_fault_t") Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Signed-off-by: Takashi Iwai --- sound/usb/usx2y/us122l.c | 2 +- sound/usb/usx2y/usX2Yhwdep.c | 2 +- sound/usb/usx2y/usx2yhwdeppcm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c index ebcab5c5465d..8082f7b077f1 100644 --- a/sound/usb/usx2y/us122l.c +++ b/sound/usb/usx2y/us122l.c @@ -139,7 +139,7 @@ static void usb_stream_hwdep_vm_open(struct vm_area_struct *area) snd_printdd(KERN_DEBUG "%i\n", atomic_read(&us122l->mmap_count)); } -static int usb_stream_hwdep_vm_fault(struct vm_fault *vmf) +static vm_fault_t usb_stream_hwdep_vm_fault(struct vm_fault *vmf) { unsigned long offset; struct page *page; diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index d8bd7c99b48c..c1dd9a7b48df 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -31,7 +31,7 @@ #include "usbusx2y.h" #include "usX2Yhwdep.h" -static int snd_us428ctls_vm_fault(struct vm_fault *vmf) +static vm_fault_t snd_us428ctls_vm_fault(struct vm_fault *vmf) { unsigned long offset; struct page * page; diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index 0d050528a4e1..4fd9276b8e50 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -652,7 +652,7 @@ static void snd_usX2Y_hwdep_pcm_vm_close(struct vm_area_struct *area) } -static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_fault *vmf) +static vm_fault_t snd_usX2Y_hwdep_pcm_vm_fault(struct vm_fault *vmf) { unsigned long offset; void *vaddr; From 41412fe921188a2929832ebd643e63bdbb61d326 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Wed, 25 Apr 2018 09:50:29 +0530 Subject: [PATCH 564/660] ALSA: pcm: Change return type to vm_fault_t Use new return type vm_fault_t for fault handler. For now, this is just documenting that the function returns a VM_FAULT value rather than an errno. Once all instances are converted, vm_fault_t will become a distinct type. Commit 1c8f422059ae ("mm: change return type to vm_fault_t") Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 159706cf8f05..0e875d5a9e86 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3234,7 +3234,7 @@ static __poll_t snd_pcm_capture_poll(struct file *file, poll_table * wait) /* * mmap status record */ -static int snd_pcm_mmap_status_fault(struct vm_fault *vmf) +static vm_fault_t snd_pcm_mmap_status_fault(struct vm_fault *vmf) { struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; @@ -3270,7 +3270,7 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file /* * mmap control record */ -static int snd_pcm_mmap_control_fault(struct vm_fault *vmf) +static vm_fault_t snd_pcm_mmap_control_fault(struct vm_fault *vmf) { struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; @@ -3359,7 +3359,7 @@ snd_pcm_default_page_ops(struct snd_pcm_substream *substream, unsigned long ofs) /* * fault callback for mmapping a RAM page */ -static int snd_pcm_mmap_data_fault(struct vm_fault *vmf) +static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf) { struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; From 4e00b339e264802851aff8e73cde7d24b57b18ce Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 25 Apr 2018 01:12:32 -0400 Subject: [PATCH 565/660] random: rate limit unseeded randomness warnings On systems without sufficient boot randomness, no point spamming dmesg. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- drivers/char/random.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 721dca8db9cf..cd888d4ee605 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -261,6 +261,7 @@ #include #include #include +#include #include #include #include @@ -438,6 +439,16 @@ static void _crng_backtrack_protect(struct crng_state *crng, static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); +static struct ratelimit_state unseeded_warning = + RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); +static struct ratelimit_state urandom_warning = + RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); + +static int ratelimit_disable __read_mostly; + +module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); +MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); + /********************************************************************** * * OS independent entropy store. Here are the functions which handle @@ -932,6 +943,18 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) process_random_ready_list(); wake_up_interruptible(&crng_init_wait); pr_notice("random: crng init done\n"); + if (unseeded_warning.missed) { + pr_notice("random: %d get_random_xx warning(s) missed " + "due to ratelimiting\n", + unseeded_warning.missed); + unseeded_warning.missed = 0; + } + if (urandom_warning.missed) { + pr_notice("random: %d urandom warning(s) missed " + "due to ratelimiting\n", + urandom_warning.missed); + urandom_warning.missed = 0; + } } } @@ -1572,8 +1595,9 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, #ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM print_once = true; #endif - pr_notice("random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); + if (__ratelimit(&unseeded_warning)) + pr_notice("random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); } /* @@ -1767,6 +1791,10 @@ static int rand_initialize(void) init_std_data(&blocking_pool); crng_initialize(&primary_crng); crng_global_init_time = jiffies; + if (ratelimit_disable) { + urandom_warning.interval = 0; + unseeded_warning.interval = 0; + } return 0; } early_initcall(rand_initialize); @@ -1834,9 +1862,10 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) if (!crng_ready() && maxwarn > 0) { maxwarn--; - printk(KERN_NOTICE "random: %s: uninitialized urandom read " - "(%zd bytes read)\n", - current->comm, nbytes); + if (__ratelimit(&urandom_warning)) + printk(KERN_NOTICE "random: %s: uninitialized " + "urandom read (%zd bytes read)\n", + current->comm, nbytes); spin_lock_irqsave(&primary_crng.lock, flags); crng_init_cnt = 0; spin_unlock_irqrestore(&primary_crng.lock, flags); From 6de3b1f26d1e8adb53d97835400c541ce50155e5 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 23 Apr 2018 14:37:53 +0300 Subject: [PATCH 566/660] drm/i915: Use ktime on wait_for MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We use jiffies to determine when wait expires. However Imre did find out that jiffies can and will do a >1 increments on certain situations [1]. When this happens in a wait_for loop, we return timeout errorneously much earlier than what the real wallclock would say. We can't afford our waits to timeout prematurely. Discard jiffies and change to ktime to detect timeouts. v2: added bugzilla entry (Imre), added stable (Chris) Reported-by: Imre Deak References: https://lkml.org/lkml/2018/4/18/798 [1] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105771 Cc: Imre Deak Cc: Chris Wilson Cc: Ville Syrjälä Cc: Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180423113754.28424-1-mika.kuoppala@linux.intel.com (cherry picked from commit 3085982c6b45d7d22f76e3aa018affbc143a7370) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d4368589b355..a80fbad9be0f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -49,12 +49,12 @@ * check the condition before the timeout. */ #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ - unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ + const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ int ret__; \ might_sleep(); \ for (;;) { \ - bool expired__ = time_after(jiffies, timeout__); \ + const bool expired__ = ktime_after(ktime_get_raw(), end__); \ OP; \ if (COND) { \ ret__ = 0; \ From 0b551f1e0fc50ee4e3cde2dd639cb010dae5b997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 18 Apr 2018 16:41:58 -0700 Subject: [PATCH 567/660] drm/i915/fbdev: Enable late fbdev initial configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the initial fbdev configuration (intel_fbdev_initial_config()) runs and there still no sink connected it will cause drm_fb_helper_initial_config() to return 0 as no error happened (but internally the return is -EAGAIN). Because no framebuffer was allocated, when a sink is connected intel_fbdev_output_poll_changed() will not execute drm_fb_helper_hotplug_event() that would trigger another try to do the initial fbdev configuration. So here allowing drm_fb_helper_hotplug_event() to be executed when there is no framebuffer allocated and fbdev was not set up yet. This issue also happens when a MST DP sink is connected since boot, as the MST topology is discovered in parallel if intel_fbdev_initial_config() is executed before the first sink MST is discovered it will cause this same issue. This is a follow-up patch of https://patchwork.freedesktop.org/patch/196089/ Changes from v1: - not creating a dump framebuffer anymore, instead just allowing drm_fb_helper_hotplug_event() to execute when fbdev is not setup yet. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104158 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104425 Cc: Rodrigo Vivi Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Chris Wilson Signed-off-by: José Roberto de Souza Tested-by: Paul Menzel Tested-by: frederik # 4.15.17 Tested-by: Ian Pilcher Acked-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180418234158.9388-1-jose.souza@intel.com (cherry picked from commit df9e6521749ab33cde306e8a4350b0ac7889220a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 6f12adc06365..6467a5cc2ca3 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -806,7 +806,7 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev) return; intel_fbdev_sync(ifbdev); - if (ifbdev->vma) + if (ifbdev->vma || ifbdev->helper.deferred_setup) drm_fb_helper_hotplug_event(&ifbdev->helper); } From ea04a1dbf8b1d6af759d58e705636fde48583f8f Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 25 Apr 2018 15:31:52 +0800 Subject: [PATCH 568/660] ALSA: hda/realtek - Add some fixes for ALC233 Fill COEF to change EAPD to verb control. Assigned codec type. This is an additional fix over 92f974df3460 ("ALSA: hda/realtek - New vendor ID for ALC233"). [ More notes: according to Kailang, the chip is 10ec:0235 bonding for ALC233b, which is equivalent with ALC255. It's only used for Lenovo. The chip needs no alc_process_coef_fw() for headset unlike ALC255. ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc77bf7a1544..f3ad5dea2f8d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -331,6 +331,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) /* fallthrough */ case 0x10ec0215: case 0x10ec0233: + case 0x10ec0235: case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: @@ -7160,6 +7161,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0298: spec->codec_variant = ALC269_TYPE_ALC298; break; + case 0x10ec0235: case 0x10ec0255: spec->codec_variant = ALC269_TYPE_ALC255; break; From ab3b8e5159b5335c81ba2d09ee5054d4a1b5a7a6 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 25 Apr 2018 16:05:27 +0800 Subject: [PATCH 569/660] ALSA: hda/realtek - Update ALC255 depop optimize Add ALC255 its own depop functions for alc_init and alc_shutup. Assign it to ALC256 usage. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f3ad5dea2f8d..c13b7fd0f58b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7164,6 +7164,8 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0235: case 0x10ec0255: spec->codec_variant = ALC269_TYPE_ALC255; + spec->shutup = alc256_shutup; + spec->init_hook = alc256_init; break; case 0x10ec0236: case 0x10ec0256: From f5e94b4c6ebdabe0f602d796e0430180927521a0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 07:26:59 +0200 Subject: [PATCH 570/660] ALSA: seq: oss: Fix unbalanced use lock for synth MIDI device When get_synthdev() is called for a MIDI device, it returns the fixed midi_synth_dev without the use refcounting. OTOH, the caller is supposed to unreference unconditionally after the usage, so this would lead to unbalanced refcount. This patch corrects the behavior and keep up the refcount balance also for the MIDI synth device. Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_synth.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index cd0e0ebbfdb1..9e2b250ae780 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -363,10 +363,14 @@ get_synthdev(struct seq_oss_devinfo *dp, int dev) return NULL; if (! dp->synths[dev].opened) return NULL; - if (dp->synths[dev].is_midi) - return &midi_synth_dev; - if ((rec = get_sdev(dev)) == NULL) - return NULL; + if (dp->synths[dev].is_midi) { + rec = &midi_synth_dev; + snd_use_lock_use(&rec->use_lock); + } else { + rec = get_sdev(dev); + if (!rec) + return NULL; + } if (! rec->opened) { snd_use_lock_free(&rec->use_lock); return NULL; From 8d218dd8116695ecda7164f97631c069938aa22e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 07:31:54 +0200 Subject: [PATCH 571/660] ALSA: seq: oss: Hardening for potential Spectre v1 As Smatch recently suggested, a few places in OSS sequencer codes may expand the array directly from the user-space value with speculation, namely there are a significant amount of references to either info->ch[] or dp->synths[] array: sound/core/seq/oss/seq_oss_event.c:315 note_on_event() warn: potential spectre issue 'info->ch' (local cap) sound/core/seq/oss/seq_oss_event.c:362 note_off_event() warn: potential spectre issue 'info->ch' (local cap) sound/core/seq/oss/seq_oss_synth.c:470 snd_seq_oss_synth_load_patch() warn: potential spectre issue 'dp->synths' (local cap) sound/core/seq/oss/seq_oss_event.c:293 note_on_event() warn: potential spectre issue 'dp->synths' sound/core/seq/oss/seq_oss_event.c:353 note_off_event() warn: potential spectre issue 'dp->synths' sound/core/seq/oss/seq_oss_synth.c:506 snd_seq_oss_synth_sysex() warn: potential spectre issue 'dp->synths' sound/core/seq/oss/seq_oss_synth.c:580 snd_seq_oss_synth_ioctl() warn: potential spectre issue 'dp->synths' Although all these seem doing only the first load without further reference, we may want to stay in a safer side, so hardening with array_index_nospec() would still make sense. We may put array_index_nospec() at each place, but here we take a different approach: - For dp->synths[], change the helpers to retrieve seq_oss_synthinfo pointer directly instead of the array expansion at each place - For info->ch[], harden in a normal way, as there are only a couple of places As a result, the existing helper, snd_seq_oss_synth_is_valid() is replaced with snd_seq_oss_synth_info(). Also, we cover MIDI device where a similar array expansion is done, too, although it wasn't reported by Smatch. BugLink: https://marc.info/?l=linux-kernel&m=152411496503418&w=2 Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_event.c | 15 +++--- sound/core/seq/oss/seq_oss_midi.c | 2 + sound/core/seq/oss/seq_oss_synth.c | 75 +++++++++++++++++------------- sound/core/seq/oss/seq_oss_synth.h | 3 +- 4 files changed, 55 insertions(+), 40 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_event.c b/sound/core/seq/oss/seq_oss_event.c index c3908862bc8b..86ca584c27b2 100644 --- a/sound/core/seq/oss/seq_oss_event.c +++ b/sound/core/seq/oss/seq_oss_event.c @@ -26,6 +26,7 @@ #include #include "seq_oss_readq.h" #include "seq_oss_writeq.h" +#include /* @@ -287,10 +288,10 @@ note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, st { struct seq_oss_synthinfo *info; - if (!snd_seq_oss_synth_is_valid(dp, dev)) + info = snd_seq_oss_synth_info(dp, dev); + if (!info) return -ENXIO; - info = &dp->synths[dev]; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { @@ -298,6 +299,7 @@ note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, st return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } + ch = array_index_nospec(ch, info->nr_voices); if (note == 255 && info->ch[ch].note >= 0) { /* volume control */ int type; @@ -347,10 +349,10 @@ note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, s { struct seq_oss_synthinfo *info; - if (!snd_seq_oss_synth_is_valid(dp, dev)) + info = snd_seq_oss_synth_info(dp, dev); + if (!info) return -ENXIO; - info = &dp->synths[dev]; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { @@ -358,6 +360,7 @@ note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, s return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } + ch = array_index_nospec(ch, info->nr_voices); if (info->ch[ch].note >= 0) { note = info->ch[ch].note; info->ch[ch].vel = 0; @@ -381,7 +384,7 @@ note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, s static int set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev) { - if (! snd_seq_oss_synth_is_valid(dp, dev)) + if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; @@ -399,7 +402,7 @@ set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, static int set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev) { - if (! snd_seq_oss_synth_is_valid(dp, dev)) + if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index b30b2139e3f0..9debd1b8fd28 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -29,6 +29,7 @@ #include "../seq_lock.h" #include #include +#include /* @@ -315,6 +316,7 @@ get_mididev(struct seq_oss_devinfo *dp, int dev) { if (dev < 0 || dev >= dp->max_mididev) return NULL; + dev = array_index_nospec(dev, dp->max_mididev); return get_mdev(dev); } diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 9e2b250ae780..278ebb993122 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * constants @@ -339,17 +340,13 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp) dp->max_synthdev = 0; } -/* - * check if the specified device is MIDI mapped device - */ -static int -is_midi_dev(struct seq_oss_devinfo *dp, int dev) +static struct seq_oss_synthinfo * +get_synthinfo_nospec(struct seq_oss_devinfo *dp, int dev) { if (dev < 0 || dev >= dp->max_synthdev) - return 0; - if (dp->synths[dev].is_midi) - return 1; - return 0; + return NULL; + dev = array_index_nospec(dev, SNDRV_SEQ_OSS_MAX_SYNTH_DEVS); + return &dp->synths[dev]; } /* @@ -359,11 +356,13 @@ static struct seq_oss_synth * get_synthdev(struct seq_oss_devinfo *dp, int dev) { struct seq_oss_synth *rec; - if (dev < 0 || dev >= dp->max_synthdev) + struct seq_oss_synthinfo *info = get_synthinfo_nospec(dp, dev); + + if (!info) return NULL; - if (! dp->synths[dev].opened) + if (!info->opened) return NULL; - if (dp->synths[dev].is_midi) { + if (info->is_midi) { rec = &midi_synth_dev; snd_use_lock_use(&rec->use_lock); } else { @@ -406,10 +405,8 @@ snd_seq_oss_synth_reset(struct seq_oss_devinfo *dp, int dev) struct seq_oss_synth *rec; struct seq_oss_synthinfo *info; - if (snd_BUG_ON(dev < 0 || dev >= dp->max_synthdev)) - return; - info = &dp->synths[dev]; - if (! info->opened) + info = get_synthinfo_nospec(dp, dev); + if (!info || !info->opened) return; if (info->sysex) info->sysex->len = 0; /* reset sysex */ @@ -458,12 +455,14 @@ snd_seq_oss_synth_load_patch(struct seq_oss_devinfo *dp, int dev, int fmt, const char __user *buf, int p, int c) { struct seq_oss_synth *rec; + struct seq_oss_synthinfo *info; int rc; - if (dev < 0 || dev >= dp->max_synthdev) + info = get_synthinfo_nospec(dp, dev); + if (!info) return -ENXIO; - if (is_midi_dev(dp, dev)) + if (info->is_midi) return 0; if ((rec = get_synthdev(dp, dev)) == NULL) return -ENXIO; @@ -471,24 +470,25 @@ snd_seq_oss_synth_load_patch(struct seq_oss_devinfo *dp, int dev, int fmt, if (rec->oper.load_patch == NULL) rc = -ENXIO; else - rc = rec->oper.load_patch(&dp->synths[dev].arg, fmt, buf, p, c); + rc = rec->oper.load_patch(&info->arg, fmt, buf, p, c); snd_use_lock_free(&rec->use_lock); return rc; } /* - * check if the device is valid synth device + * check if the device is valid synth device and return the synth info */ -int -snd_seq_oss_synth_is_valid(struct seq_oss_devinfo *dp, int dev) +struct seq_oss_synthinfo * +snd_seq_oss_synth_info(struct seq_oss_devinfo *dp, int dev) { struct seq_oss_synth *rec; + rec = get_synthdev(dp, dev); if (rec) { snd_use_lock_free(&rec->use_lock); - return 1; + return get_synthinfo_nospec(dp, dev); } - return 0; + return NULL; } @@ -503,16 +503,18 @@ snd_seq_oss_synth_sysex(struct seq_oss_devinfo *dp, int dev, unsigned char *buf, int i, send; unsigned char *dest; struct seq_oss_synth_sysex *sysex; + struct seq_oss_synthinfo *info; - if (! snd_seq_oss_synth_is_valid(dp, dev)) + info = snd_seq_oss_synth_info(dp, dev); + if (!info) return -ENXIO; - sysex = dp->synths[dev].sysex; + sysex = info->sysex; if (sysex == NULL) { sysex = kzalloc(sizeof(*sysex), GFP_KERNEL); if (sysex == NULL) return -ENOMEM; - dp->synths[dev].sysex = sysex; + info->sysex = sysex; } send = 0; @@ -557,10 +559,12 @@ snd_seq_oss_synth_sysex(struct seq_oss_devinfo *dp, int dev, unsigned char *buf, int snd_seq_oss_synth_addr(struct seq_oss_devinfo *dp, int dev, struct snd_seq_event *ev) { - if (! snd_seq_oss_synth_is_valid(dp, dev)) + struct seq_oss_synthinfo *info = snd_seq_oss_synth_info(dp, dev); + + if (!info) return -EINVAL; - snd_seq_oss_fill_addr(dp, ev, dp->synths[dev].arg.addr.client, - dp->synths[dev].arg.addr.port); + snd_seq_oss_fill_addr(dp, ev, info->arg.addr.client, + info->arg.addr.port); return 0; } @@ -572,16 +576,18 @@ int snd_seq_oss_synth_ioctl(struct seq_oss_devinfo *dp, int dev, unsigned int cmd, unsigned long addr) { struct seq_oss_synth *rec; + struct seq_oss_synthinfo *info; int rc; - if (is_midi_dev(dp, dev)) + info = get_synthinfo_nospec(dp, dev); + if (!info || info->is_midi) return -ENXIO; if ((rec = get_synthdev(dp, dev)) == NULL) return -ENXIO; if (rec->oper.ioctl == NULL) rc = -ENXIO; else - rc = rec->oper.ioctl(&dp->synths[dev].arg, cmd, addr); + rc = rec->oper.ioctl(&info->arg, cmd, addr); snd_use_lock_free(&rec->use_lock); return rc; } @@ -593,7 +599,10 @@ snd_seq_oss_synth_ioctl(struct seq_oss_devinfo *dp, int dev, unsigned int cmd, u int snd_seq_oss_synth_raw_event(struct seq_oss_devinfo *dp, int dev, unsigned char *data, struct snd_seq_event *ev) { - if (! snd_seq_oss_synth_is_valid(dp, dev) || is_midi_dev(dp, dev)) + struct seq_oss_synthinfo *info; + + info = snd_seq_oss_synth_info(dp, dev); + if (!info || info->is_midi) return -ENXIO; ev->type = SNDRV_SEQ_EVENT_OSS; memcpy(ev->data.raw8.d, data, 8); diff --git a/sound/core/seq/oss/seq_oss_synth.h b/sound/core/seq/oss/seq_oss_synth.h index 74ac55f166b6..a63f9e22974d 100644 --- a/sound/core/seq/oss/seq_oss_synth.h +++ b/sound/core/seq/oss/seq_oss_synth.h @@ -37,7 +37,8 @@ void snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp); void snd_seq_oss_synth_reset(struct seq_oss_devinfo *dp, int dev); int snd_seq_oss_synth_load_patch(struct seq_oss_devinfo *dp, int dev, int fmt, const char __user *buf, int p, int c); -int snd_seq_oss_synth_is_valid(struct seq_oss_devinfo *dp, int dev); +struct seq_oss_synthinfo *snd_seq_oss_synth_info(struct seq_oss_devinfo *dp, + int dev); int snd_seq_oss_synth_sysex(struct seq_oss_devinfo *dp, int dev, unsigned char *buf, struct snd_seq_event *ev); int snd_seq_oss_synth_addr(struct seq_oss_devinfo *dp, int dev, struct snd_seq_event *ev); From 088e861edffb84879cf0c0d1b02eda078c3a0ffe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 07:45:56 +0200 Subject: [PATCH 572/660] ALSA: control: Hardening for potential Spectre v1 As recently Smatch suggested, a few places in ALSA control core codes may expand the array directly from the user-space value with speculation: sound/core/control.c:1003 snd_ctl_elem_lock() warn: potential spectre issue 'kctl->vd' sound/core/control.c:1031 snd_ctl_elem_unlock() warn: potential spectre issue 'kctl->vd' sound/core/control.c:844 snd_ctl_elem_info() warn: potential spectre issue 'kctl->vd' sound/core/control.c:891 snd_ctl_elem_read() warn: potential spectre issue 'kctl->vd' sound/core/control.c:939 snd_ctl_elem_write() warn: potential spectre issue 'kctl->vd' Although all these seem doing only the first load without further reference, we may want to stay in a safer side, so hardening with array_index_nospec() would still make sense. In this patch, we put array_index_nospec() to the common snd_ctl_get_ioff*() helpers instead of each caller. These helpers are also referred from some drivers, too, and basically all usages are to calculate the array index from the user-space value, hence it's better to cover there. BugLink: https://marc.info/?l=linux-kernel&m=152411496503418&w=2 Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- include/sound/control.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/sound/control.h b/include/sound/control.h index ca13a44ae9d4..6011a58d3e20 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -23,6 +23,7 @@ */ #include +#include #include #define snd_kcontrol_chip(kcontrol) ((kcontrol)->private_data) @@ -148,12 +149,14 @@ int snd_ctl_get_preferred_subdevice(struct snd_card *card, int type); static inline unsigned int snd_ctl_get_ioffnum(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id) { - return id->numid - kctl->id.numid; + unsigned int ioff = id->numid - kctl->id.numid; + return array_index_nospec(ioff, kctl->count); } static inline unsigned int snd_ctl_get_ioffidx(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id) { - return id->index - kctl->id.index; + unsigned int ioff = id->index - kctl->id.index; + return array_index_nospec(ioff, kctl->count); } static inline unsigned int snd_ctl_get_ioff(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id) From 69fa6f19b95597618ab30438a27b67ad93daa7c7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 07:50:50 +0200 Subject: [PATCH 573/660] ALSA: hda: Hardening for potential Spectre v1 As recently Smatch suggested, one place in HD-audio hwdep ioctl codes may expand the array directly from the user-space value with speculation: sound/pci/hda/hda_local.h:467 get_wcaps() warn: potential spectre issue 'codec->wcaps' As get_wcaps() itself is a fairly frequently called inline function, and there is only one single call with a user-space value, we replace only the latter one to open-code locally with array_index_nospec() hardening in this patch. BugLink: https://marc.info/?l=linux-kernel&m=152411496503418&w=2 Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_hwdep.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_hwdep.c b/sound/pci/hda/hda_hwdep.c index 57df06e76968..cc009a4a3d1d 100644 --- a/sound/pci/hda/hda_hwdep.c +++ b/sound/pci/hda/hda_hwdep.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "hda_codec.h" #include "hda_local.h" @@ -51,7 +52,16 @@ static int get_wcap_ioctl(struct hda_codec *codec, if (get_user(verb, &arg->verb)) return -EFAULT; - res = get_wcaps(codec, verb >> 24); + /* open-code get_wcaps(verb>>24) with nospec */ + verb >>= 24; + if (verb < codec->core.start_nid || + verb >= codec->core.start_nid + codec->core.num_nodes) { + res = 0; + } else { + verb -= codec->core.start_nid; + verb = array_index_nospec(verb, codec->core.num_nodes); + res = codec->wcaps[verb]; + } if (put_user(res, &arg->res)) return -EFAULT; return 0; From 7f054a5bee0987f1e2d4e59daea462421c76f2cb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 07:56:07 +0200 Subject: [PATCH 574/660] ALSA: opl3: Hardening for potential Spectre v1 As recently Smatch suggested, one place in OPL3 driver may expand the array directly from the user-space value with speculation: sound/drivers/opl3/opl3_synth.c:476 snd_opl3_set_voice() warn: potential spectre issue 'snd_opl3_regmap' This patch puts array_index_nospec() for hardening against it. BugLink: https://marc.info/?l=linux-kernel&m=152411496503418&w=2 Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- sound/drivers/opl3/opl3_synth.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/drivers/opl3/opl3_synth.c b/sound/drivers/opl3/opl3_synth.c index ddcc1a325a61..42920a243328 100644 --- a/sound/drivers/opl3/opl3_synth.c +++ b/sound/drivers/opl3/opl3_synth.c @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -448,7 +449,7 @@ static int snd_opl3_set_voice(struct snd_opl3 * opl3, struct snd_dm_fm_voice * v { unsigned short reg_side; unsigned char op_offset; - unsigned char voice_offset; + unsigned char voice_offset, voice_op; unsigned short opl3_reg; unsigned char reg_val; @@ -473,7 +474,9 @@ static int snd_opl3_set_voice(struct snd_opl3 * opl3, struct snd_dm_fm_voice * v voice_offset = voice->voice - MAX_OPL2_VOICES; } /* Get register offset of operator */ - op_offset = snd_opl3_regmap[voice_offset][voice->op]; + voice_offset = array_index_nospec(voice_offset, MAX_OPL2_VOICES); + voice_op = array_index_nospec(voice->op, 4); + op_offset = snd_opl3_regmap[voice_offset][voice_op]; reg_val = 0x00; /* Set amplitude modulation (tremolo) effect */ From f9d94b57e30fd1575b4935045b32d738668aa74b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 08:01:48 +0200 Subject: [PATCH 575/660] ALSA: asihpi: Hardening for potential Spectre v1 As recently Smatch suggested, a couple of places in ASIHPI driver may expand the array directly from the user-space value with speculation: sound/pci/asihpi/hpimsginit.c:70 hpi_init_response() warn: potential spectre issue 'res_size' (local cap) sound/pci/asihpi/hpioctl.c:189 asihpi_hpi_ioctl() warn: potential spectre issue 'adapters' This patch puts array_index_nospec() for hardening against them. BugLink: https://marc.info/?l=linux-kernel&m=152411496503418&w=2 Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpimsginit.c | 13 +++++++++---- sound/pci/asihpi/hpioctl.c | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/sound/pci/asihpi/hpimsginit.c b/sound/pci/asihpi/hpimsginit.c index 7eb617175fde..a31a70dccecf 100644 --- a/sound/pci/asihpi/hpimsginit.c +++ b/sound/pci/asihpi/hpimsginit.c @@ -23,6 +23,7 @@ #include "hpi_internal.h" #include "hpimsginit.h" +#include /* The actual message size for each object type */ static u16 msg_size[HPI_OBJ_MAXINDEX + 1] = HPI_MESSAGE_SIZE_BY_OBJECT; @@ -39,10 +40,12 @@ static void hpi_init_message(struct hpi_message *phm, u16 object, { u16 size; - if ((object > 0) && (object <= HPI_OBJ_MAXINDEX)) + if ((object > 0) && (object <= HPI_OBJ_MAXINDEX)) { + object = array_index_nospec(object, HPI_OBJ_MAXINDEX + 1); size = msg_size[object]; - else + } else { size = sizeof(*phm); + } memset(phm, 0, size); phm->size = size; @@ -66,10 +69,12 @@ void hpi_init_response(struct hpi_response *phr, u16 object, u16 function, { u16 size; - if ((object > 0) && (object <= HPI_OBJ_MAXINDEX)) + if ((object > 0) && (object <= HPI_OBJ_MAXINDEX)) { + object = array_index_nospec(object, HPI_OBJ_MAXINDEX + 1); size = res_size[object]; - else + } else { size = sizeof(*phr); + } memset(phr, 0, sizeof(*phr)); phr->size = size; diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 5badd08e1d69..b1a2a7ea4172 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef MODULE_FIRMWARE MODULE_FIRMWARE("asihpi/dsp5000.bin"); @@ -186,7 +187,8 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct hpi_adapter *pa = NULL; if (hm->h.adapter_index < ARRAY_SIZE(adapters)) - pa = &adapters[hm->h.adapter_index]; + pa = &adapters[array_index_nospec(hm->h.adapter_index, + ARRAY_SIZE(adapters))]; if (!pa || !pa->adapter || !pa->adapter->type) { hpi_init_response(&hr->r0, hm->h.object, From 10513142a7114d251670361ad40cba2c61403406 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 08:03:14 +0200 Subject: [PATCH 576/660] ALSA: hdspm: Hardening for potential Spectre v1 As recently Smatch suggested, a couple of places in HDSP MADI driver may expand the array directly from the user-space value with speculation: sound/pci/rme9652/hdspm.c:5717 snd_hdspm_channel_info() warn: potential spectre issue 'hdspm->channel_map_out' (local cap) sound/pci/rme9652/hdspm.c:5734 snd_hdspm_channel_info() warn: potential spectre issue 'hdspm->channel_map_in' (local cap) This patch puts array_index_nospec() for hardening against them. BugLink: https://marc.info/?l=linux-kernel&m=152411496503418&w=2 Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 4c59983158e0..11b5b5e0e058 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -137,6 +137,7 @@ #include #include #include +#include #include #include @@ -5698,40 +5699,43 @@ static int snd_hdspm_channel_info(struct snd_pcm_substream *substream, struct snd_pcm_channel_info *info) { struct hdspm *hdspm = snd_pcm_substream_chip(substream); + unsigned int channel = info->channel; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - if (snd_BUG_ON(info->channel >= hdspm->max_channels_out)) { + if (snd_BUG_ON(channel >= hdspm->max_channels_out)) { dev_info(hdspm->card->dev, "snd_hdspm_channel_info: output channel out of range (%d)\n", - info->channel); + channel); return -EINVAL; } - if (hdspm->channel_map_out[info->channel] < 0) { + channel = array_index_nospec(channel, hdspm->max_channels_out); + if (hdspm->channel_map_out[channel] < 0) { dev_info(hdspm->card->dev, "snd_hdspm_channel_info: output channel %d mapped out\n", - info->channel); + channel); return -EINVAL; } - info->offset = hdspm->channel_map_out[info->channel] * + info->offset = hdspm->channel_map_out[channel] * HDSPM_CHANNEL_BUFFER_BYTES; } else { - if (snd_BUG_ON(info->channel >= hdspm->max_channels_in)) { + if (snd_BUG_ON(channel >= hdspm->max_channels_in)) { dev_info(hdspm->card->dev, "snd_hdspm_channel_info: input channel out of range (%d)\n", - info->channel); + channel); return -EINVAL; } - if (hdspm->channel_map_in[info->channel] < 0) { + channel = array_index_nospec(channel, hdspm->max_channels_in); + if (hdspm->channel_map_in[channel] < 0) { dev_info(hdspm->card->dev, "snd_hdspm_channel_info: input channel %d mapped out\n", - info->channel); + channel); return -EINVAL; } - info->offset = hdspm->channel_map_in[info->channel] * + info->offset = hdspm->channel_map_in[channel] * HDSPM_CHANNEL_BUFFER_BYTES; } From f526afcd8f71945c23ce581d7864ace93de8a4f7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Apr 2018 08:04:41 +0200 Subject: [PATCH 577/660] ALSA: rme9652: Hardening for potential Spectre v1 As recently Smatch suggested, one place in RME9652 driver may expand the array directly from the user-space value with speculation: sound/pci/rme9652/rme9652.c:2074 snd_rme9652_channel_info() warn: potential spectre issue 'rme9652->channel_map' (local cap) This patch puts array_index_nospec() for hardening against it. BugLink: https://marc.info/?l=linux-kernel&m=152411496503418&w=2 Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- sound/pci/rme9652/rme9652.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index df648b1d9217..edd765e22377 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -2071,9 +2072,10 @@ static int snd_rme9652_channel_info(struct snd_pcm_substream *substream, if (snd_BUG_ON(info->channel >= RME9652_NCHANNELS)) return -EINVAL; - if ((chn = rme9652->channel_map[info->channel]) < 0) { + chn = rme9652->channel_map[array_index_nospec(info->channel, + RME9652_NCHANNELS)]; + if (chn < 0) return -EINVAL; - } info->offset = chn * RME9652_CHANNEL_BUFFER_BYTES; info->first = 0; From d2479a30499d93377d3ab10b7822bc5f10ed7f22 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 20 Apr 2018 15:20:19 -0700 Subject: [PATCH 578/660] x86/pti: Fix boot problems from Global-bit setting commit 16dce603adc9de4237b7bf2ff5c5290f34373e7b Part of the global bit _setting_ patches also includes clearing the Global bit when it should not be enabled. That is done with set_memory_nonglobal(), which uses change_page_attr_clear() in pageattr.c under the covers. The TLB flushing code inside pageattr.c has has checks like BUG_ON(irqs_disabled()), looking for interrupt disabling that might cause deadlocks. But, these also trip in early boot on certain preempt configurations. Just copy the existing BUG_ON() sequence from cpa_flush_range() to the other two sites and check for early boot. Fixes: 39114b7a7 (x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image) Reported-by: Mariusz Ceier Reported-by: Aaro Koskinen Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Tested-by: Aaro Koskinen Acked-by: Ingo Molnar Cc: Andrea Arcangeli Cc: Juergen Gross Cc: Nadav Amit Cc: Kees Cook Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: David Woodhouse Cc: Hugh Dickins Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: Borislav Petkov Cc: Dan Williams Cc: Arjan van de Ven Link: https://lkml.kernel.org/r/20180420222019.20C4A410@viggo.jf.intel.com --- arch/x86/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0f3d50f4c48c..4fadfd2b7017 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -172,7 +172,7 @@ static void __cpa_flush_all(void *arg) static void cpa_flush_all(unsigned long cache) { - BUG_ON(irqs_disabled()); + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); on_each_cpu(__cpa_flush_all, (void *) cache, 1); } @@ -236,7 +236,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ #endif - BUG_ON(irqs_disabled()); + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); From 58e65b51e6f9b9dd94a25ff2b2772222e0358099 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 20 Apr 2018 15:20:21 -0700 Subject: [PATCH 579/660] x86/pti: Fix boot warning from Global-bit setting commit 231df823c4f04176f607afc4576c989895cff40e The pageattr.c code attempts to process "faults" when it goes looking for PTEs to change and finds non-present entries. It allows these faults in the linear map which is "expected to have holes", but WARN()s about them elsewhere, like when called on the kernel image. However, change_page_attr_clear() is now called on the kernel image in the process of trying to clear the Global bit. This trips the warning in __cpa_process_fault() if a non-present PTE is encountered in the kernel image. The "holes" in the kernel image result from free_init_pages()'s use of set_memory_np(). These holes are totally fine, and result from normal operation, just as they would be in the kernel linear map. Just silence the warning when holes in the kernel image are encountered. Fixes: 39114b7a7 (x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image) Reported-by: Mariusz Ceier Reported-by: Aaro Koskinen Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Tested-by: Aaro Koskinen Acked-by: Ingo Molnar Cc: Andrea Arcangeli Cc: Juergen Gross Cc: Nadav Amit Cc: Kees Cook Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: David Woodhouse Cc: Hugh Dickins Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: Borislav Petkov Cc: Dan Williams Cc: Arjan van de Ven Link: https://lkml.kernel.org/r/20180420222021.1C7D2B3F@viggo.jf.intel.com --- arch/x86/mm/pageattr.c | 48 ++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4fadfd2b7017..3bded76e8d5c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file *m) static inline void split_page_count(int level) { } #endif +static inline int +within(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr < end; +} + +static inline int +within_inclusive(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr <= end; +} + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -106,20 +118,25 @@ static inline unsigned long highmap_end_pfn(void) return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; } +static bool __cpa_pfn_in_highmap(unsigned long pfn) +{ + /* + * Kernel text has an alias mapping at a high address, known + * here as "highmap". + */ + return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn()); +} + +#else + +static bool __cpa_pfn_in_highmap(unsigned long pfn) +{ + /* There is no highmap on 32-bit */ + return false; +} + #endif -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr < end; -} - -static inline int -within_inclusive(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr <= end; -} - /* * Flushing functions */ @@ -1183,6 +1200,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, cpa->numpages = 1; cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; return 0; + + } else if (__cpa_pfn_in_highmap(cpa->pfn)) { + /* Faults in the highmap are OK, so do not warn: */ + return -EFAULT; } else { WARN(1, KERN_WARNING "CPA: called for zero pte. " "vaddr = %lx cpa->vaddr = %lx\n", vaddr, @@ -1335,8 +1356,7 @@ static int cpa_process_alias(struct cpa_data *cpa) * to touch the high mapped kernel as well: */ if (!within(vaddr, (unsigned long)_text, _brk_end) && - within_inclusive(cpa->pfn, highmap_start_pfn(), - highmap_end_pfn())) { + __cpa_pfn_in_highmap(cpa->pfn)) { unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; alias_cpa = *cpa; From a44ca8f5a30c008b54d07b00eed4eae7f169fcd0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 20 Apr 2018 15:20:23 -0700 Subject: [PATCH 580/660] x86/pti: Reduce amount of kernel text allowed to be Global commit abb67605203687c8b7943d760638d0301787f8d9 Kees reported to me that I made too much of the kernel image global. It was far more than just text: I think this is too much set global: _end is after data, bss, and brk, and all kinds of other stuff that could hold secrets. I think this should match what mark_rodata_ro() is doing. This does exactly that. We use __end_rodata_hpage_align as our marker both because it is huge-page-aligned and it does not contain any sections we expect to hold secrets. Kees's logic was that r/o data is in the kernel image anyway and, in the case of traditional distributions, can be freely downloaded from the web, so there's no reason to hide it. Fixes: 8c06c7740 (x86/pti: Leave kernel text global for !PCID) Reported-by: Kees Cook Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Andrea Arcangeli Cc: Juergen Gross Cc: Josh Poimboeuf Cc: Greg Kroah-Hartman Cc: Peter Zijlstra Cc: David Woodhouse Cc: Hugh Dickins Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Nadav Amit Cc: Dan Williams Cc: Arjan van de Ven Link: https://lkml.kernel.org/r/20180420222023.1C8B2B20@viggo.jf.intel.com --- arch/x86/mm/pti.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index f1fd52f449e0..ae3eb4f5d53b 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -430,12 +430,24 @@ static inline bool pti_kernel_image_global_ok(void) */ void pti_clone_kernel_text(void) { + /* + * rodata is part of the kernel image and is normally + * readable on the filesystem or on the web. But, do not + * clone the areas past rodata, they might contain secrets. + */ unsigned long start = PFN_ALIGN(_text); - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + unsigned long end = (unsigned long)__end_rodata_hpage_align; if (!pti_kernel_image_global_ok()) return; + pr_debug("mapping partial kernel image into user address space\n"); + + /* + * Note that this will undo _some_ of the work that + * pti_set_kernel_image_nonglobal() did to clear the + * global bit. + */ pti_clone_pmds(start, end, _PAGE_RW); } @@ -458,8 +470,6 @@ void pti_set_kernel_image_nonglobal(void) if (pti_kernel_image_global_ok()) return; - pr_debug("set kernel image non-global\n"); - set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); } From b7c21bc56fbedf4a61b628c6b11e0d7048746cc1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 20 Apr 2018 15:20:26 -0700 Subject: [PATCH 581/660] x86/pti: Disallow global kernel text with RANDSTRUCT commit 26d35ca6c3776784f8156e1d6f80cc60d9a2a915 RANDSTRUCT derives its hardening benefits from the attacker's lack of knowledge about the layout of kernel data structures. Keep the kernel image non-global in cases where RANDSTRUCT is in use to help keep the layout a secret. Fixes: 8c06c7740 (x86/pti: Leave kernel text global for !PCID) Reported-by: Kees Cook Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Andrea Arcangeli Cc: Juergen Gross Cc: Josh Poimboeuf Cc: Greg Kroah-Hartman Cc: Peter Zijlstra Cc: David Woodhouse Cc: Hugh Dickins Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Nadav Amit Cc: Dan Williams Cc: Arjan van de Ven Cc: Vlastimil Babka Link: https://lkml.kernel.org/r/20180420222026.D0B4AAC9@viggo.jf.intel.com --- arch/x86/mm/pti.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ae3eb4f5d53b..4d418e705878 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -421,6 +421,16 @@ static inline bool pti_kernel_image_global_ok(void) if (boot_cpu_has(X86_FEATURE_K8)) return false; + /* + * RANDSTRUCT derives its hardening benefits from the + * attacker's lack of knowledge about the layout of kernel + * data structures. Keep the kernel image non-global in + * cases where RANDSTRUCT is in use to help keep the layout a + * secret. + */ + if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT)) + return false; + return true; } From 316d097c4cd4e7f2ef50c40cff2db266593c4ec4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 20 Apr 2018 15:20:28 -0700 Subject: [PATCH 582/660] x86/pti: Filter at vma->vm_page_prot population commit ce9962bf7e22bb3891655c349faff618922d4a73 0day reported warnings at boot on 32-bit systems without NX support: attempted to set unsupported pgprot: 8000000000000025 bits: 8000000000000000 supported: 7fffffffffffffff WARNING: CPU: 0 PID: 1 at arch/x86/include/asm/pgtable.h:540 handle_mm_fault+0xfc1/0xfe0: check_pgprot at arch/x86/include/asm/pgtable.h:535 (inlined by) pfn_pte at arch/x86/include/asm/pgtable.h:549 (inlined by) do_anonymous_page at mm/memory.c:3169 (inlined by) handle_pte_fault at mm/memory.c:3961 (inlined by) __handle_mm_fault at mm/memory.c:4087 (inlined by) handle_mm_fault at mm/memory.c:4124 The problem is that due to the recent commit which removed auto-massaging of page protections, filtering page permissions at PTE creation time is not longer done, so vma->vm_page_prot is passed unfiltered to PTE creation. Filter the page protections before they are installed in vma->vm_page_prot. Fixes: fb43d6cb91 ("x86/mm: Do not auto-massage page protections") Reported-by: Fengguang Wu Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Andrea Arcangeli Cc: Juergen Gross Cc: Kees Cook Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: David Woodhouse Cc: Hugh Dickins Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: Nadav Amit Cc: Dan Williams Cc: Arjan van de Ven Link: https://lkml.kernel.org/r/20180420222028.99D72858@viggo.jf.intel.com --- arch/x86/Kconfig | 4 ++++ arch/x86/include/asm/pgtable.h | 5 +++++ mm/mmap.c | 11 ++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 00fcf81f2c56..c07f492b871a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -52,6 +52,7 @@ config X86 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER + select ARCH_HAS_FILTER_PGPROT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 @@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_FILTER_PGPROT + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5f49b4ff0c24..f1633de5a675 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + return canon_pgprot(prot); +} + static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) diff --git a/mm/mmap.c b/mm/mmap.c index 188f195883b9..9d5968d1e8e3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_init = { __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 }; +#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + return prot; +} +#endif + pgprot_t vm_get_page_prot(unsigned long vm_flags) { - return __pgprot(pgprot_val(protection_map[vm_flags & + pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | pgprot_val(arch_vm_get_page_prot(vm_flags))); + + return arch_filter_pgprot(ret); } EXPORT_SYMBOL(vm_get_page_prot); From 65811834ba56e9ed88117cf6c09880416c9951ab Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 25 Apr 2018 17:07:27 +0800 Subject: [PATCH 583/660] ALSA: hda/realtek - change the location for one of two front mics On this Lenovo ThinkCentre machine. There are two front mics, we change the location for one of them. Relation: f33f79f3d0e5 ("ALSA: hda/realtek - change the location for one of two front microphones") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c13b7fd0f58b..8c238e51bb5a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6576,6 +6576,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3138, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), From ac61c1156623455c46701654abd8c99720bceea1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 Apr 2018 12:17:53 +1000 Subject: [PATCH 584/660] powerpc: Fix smp_send_stop NMI IPI handling The NMI IPI handler for a receiving CPU increments nmi_ipi_busy_count over the handler function call, which causes later smp_send_nmi_ipi() callers to spin until the call is finished. The stop_this_cpu() function never returns, so the busy count is never decremeted, which can cause the system to hang in some cases. For example panic() will call smp_send_stop() early on which calls stop_this_cpu() on other CPUs, then later in the reboot path, pnv_restart() will call smp_send_stop() again, which hangs. Fix this by adding a special case to the stop_this_cpu() handler to decrement the busy count, because it will never return. Now that the NMI/non-NMI versions of stop_this_cpu() are different, split them out into separate functions rather than doing #ifdef tricks to share the body between the two functions. Fixes: 6bed3237624e3 ("powerpc: use NMI IPI for smp_send_stop") Reported-by: Abdul Haleem Signed-off-by: Nicholas Piggin [mpe: Split out the functions, tweak change log a bit] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e16ec7b3b427..3582f30b60b7 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -565,11 +565,7 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif -#ifdef CONFIG_NMI_IPI -static void stop_this_cpu(struct pt_regs *regs) -#else static void stop_this_cpu(void *dummy) -#endif { /* Remove this CPU */ set_cpu_online(smp_processor_id(), false); @@ -580,10 +576,26 @@ static void stop_this_cpu(void *dummy) spin_cpu_relax(); } +#ifdef CONFIG_NMI_IPI +static void nmi_stop_this_cpu(struct pt_regs *regs) +{ + /* + * This is a special case because it never returns, so the NMI IPI + * handling would never mark it as done, which makes any later + * smp_send_nmi_ipi() call spin forever. Mark it done now. + */ + nmi_ipi_lock(); + nmi_ipi_busy_count--; + nmi_ipi_unlock(); + + stop_this_cpu(NULL); +} +#endif + void smp_send_stop(void) { #ifdef CONFIG_NMI_IPI - smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000); + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000); #else smp_call_function(stop_this_cpu, NULL, 0); #endif From 1b59788979acd230b9627276c76f6e6ba2c4709c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 Apr 2018 06:55:55 -0700 Subject: [PATCH 585/660] hwmon: (k10temp) Add temperature offset for Ryzen 2700X Ryzen 2700X has a temperature offset of 10 degrees C. If bit 19 of the Temperature Control register is set, there is an additional offset of 49 degrees C. Take this into account as well. Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Guenter Roeck --- drivers/hwmon/k10temp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 051a72eecb24..d19d08f81c6f 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -72,6 +72,7 @@ struct k10temp_data { struct pci_dev *pdev; void (*read_tempreg)(struct pci_dev *pdev, u32 *regval); int temp_offset; + u32 temp_adjust_mask; }; struct tctl_offset { @@ -84,6 +85,7 @@ static const struct tctl_offset tctl_offset_table[] = { { 0x17, "AMD Ryzen 5 1600X", 20000 }, { 0x17, "AMD Ryzen 7 1700X", 20000 }, { 0x17, "AMD Ryzen 7 1800X", 20000 }, + { 0x17, "AMD Ryzen 7 2700X", 10000 }, { 0x17, "AMD Ryzen Threadripper 1950X", 27000 }, { 0x17, "AMD Ryzen Threadripper 1920X", 27000 }, { 0x17, "AMD Ryzen Threadripper 1900X", 27000 }, @@ -129,6 +131,8 @@ static ssize_t temp1_input_show(struct device *dev, data->read_tempreg(data->pdev, ®val); temp = (regval >> 21) * 125; + if (regval & data->temp_adjust_mask) + temp -= 49000; if (temp > data->temp_offset) temp -= data->temp_offset; else @@ -259,12 +263,14 @@ static int k10temp_probe(struct pci_dev *pdev, data->pdev = pdev; if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 || - boot_cpu_data.x86_model == 0x70)) + boot_cpu_data.x86_model == 0x70)) { data->read_tempreg = read_tempreg_nb_f15; - else if (boot_cpu_data.x86 == 0x17) + } else if (boot_cpu_data.x86 == 0x17) { + data->temp_adjust_mask = 0x80000; data->read_tempreg = read_tempreg_nb_f17; - else + } else { data->read_tempreg = read_tempreg_pci; + } for (i = 0; i < ARRAY_SIZE(tctl_offset_table); i++) { const struct tctl_offset *entry = &tctl_offset_table[i]; From 877d8948d0aa402fbbede138fc73432bb335b65f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 Apr 2018 08:59:45 -0700 Subject: [PATCH 586/660] hwmon: (k10temp) Add support for AMD Ryzen w/ Vega graphics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable k10temp for AMD Ryzen APUs w/ Vega Mobile Gfx. Based on patch from René Rebe . Dropped temperature offsets since those are not supposed to apply for the affected CPUs. Cc: stable@vger.kernel.org # v4.16+ Cc: René Rebe Signed-off-by: Guenter Roeck --- drivers/hwmon/k10temp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index d19d08f81c6f..d2cc55e21374 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -40,6 +40,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #endif +#ifndef PCI_DEVICE_ID_AMD_17H_RR_NB +#define PCI_DEVICE_ID_AMD_17H_RR_NB 0x15d0 +#endif + /* CPUID function 0x80000001, ebx */ #define CPUID_PKGTYPE_MASK 0xf0000000 #define CPUID_PKGTYPE_F 0x00000000 @@ -298,6 +302,7 @@ static const struct pci_device_id k10temp_id_table[] = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_RR_NB) }, {} }; MODULE_DEVICE_TABLE(pci, k10temp_id_table); From bcdd0ca8cb8730573afebcaae4138f8f4c8eaa20 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 25 Apr 2018 20:12:31 +0900 Subject: [PATCH 587/660] tty: Use __GFP_NOFAIL for tty_ldisc_get() syzbot is reporting crashes triggered by memory allocation fault injection at tty_ldisc_get() [1]. As an attempt to handle OOM in a graceful way, we have tried commit 5362544bebe85071 ("tty: don't panic on OOM in tty_set_ldisc()"). But we reverted that attempt by commit a8983d01f9b7d600 ("Revert "tty: don't panic on OOM in tty_set_ldisc()"") due to reproducible crash. We should spend resource for finding and fixing race condition bugs rather than complicate error paths for 2 * sizeof(void *) bytes allocation failure. [1] https://syzkaller.appspot.com/bug?id=489d33fa386453859ead58ff5171d43772b13aa3 Signed-off-by: Tetsuo Handa Reported-by: syzbot Cc: Michal Hocko Cc: Vegard Nossum Cc: Dmitry Vyukov Cc: Jiri Slaby Cc: Peter Hurley Cc: One Thousand Gnomes Cc: Linus Torvalds Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 8a88a7787cfe..fb7329ab2b37 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -176,12 +176,11 @@ static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc) return ERR_CAST(ldops); } - ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL); - if (ld == NULL) { - put_ldops(ldops); - return ERR_PTR(-ENOMEM); - } - + /* + * There is no way to handle allocation failure of only 16 bytes. + * Let's simplify error handling and save more memory. + */ + ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL | __GFP_NOFAIL); ld->ops = ldops; ld->tty = tty; From d88b6d04447bbfd0b55f57afb231bf6f5b38c8f0 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Thu, 19 Apr 2018 10:59:31 -0700 Subject: [PATCH 588/660] MAINTAINERS: Remove me as maintainer of bcache Too much to do with other projects. I've enjoyed working with everyone here, and hope to occasionally contribute on bcache. Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0a1410d5a621..f0066efc121d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2614,7 +2614,6 @@ S: Maintained F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) -M: Michael Lyle M: Kent Overstreet L: linux-bcache@vger.kernel.org W: http://bcache.evilpiepirate.org From e9938f552fc279e87afcbff430cd6f8966ef31b3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Apr 2018 21:07:14 -0600 Subject: [PATCH 589/660] bcache: mark Coly Li as bcache maintainer Since Michael had to step back, Coly has agreed to be the new maintainer. Mark him as such. Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f0066efc121d..d7547101f2c7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2614,6 +2614,7 @@ S: Maintained F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) +M: Coly Li M: Kent Overstreet L: linux-bcache@vger.kernel.org W: http://bcache.evilpiepirate.org From fe644072dfee069d97a66ea9a80f4bc461499e6a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 20 Apr 2018 10:29:51 +0200 Subject: [PATCH 590/660] block: mq: Add some minor doc for core structs As it came up in discussion on the mailing list that the semantic meaning of 'blk_mq_ctx' and 'blk_mq_hw_ctx' isn't completely obvious to everyone, let's add some minimal kerneldoc for a starter. Signed-off-by: Linus Walleij Signed-off-by: Jens Axboe --- block/blk-mq.h | 3 +++ include/linux/blk-mq.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/block/blk-mq.h b/block/blk-mq.h index 88c558f71819..89b5cd3a6c70 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -7,6 +7,9 @@ struct blk_mq_tag_set; +/** + * struct blk_mq_ctx - State for a software queue facing the submitting CPUs + */ struct blk_mq_ctx { struct { spinlock_t lock; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e3986f4b3461..ebc34a5686dc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -9,6 +9,9 @@ struct blk_mq_tags; struct blk_flush_queue; +/** + * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device + */ struct blk_mq_hw_ctx { struct { spinlock_t lock; From 1c758a2202a6b4624d0703013a2c6cfa6e7455aa Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 17 Apr 2018 17:41:28 -0400 Subject: [PATCH 591/660] tracing/x86: Update syscall trace events to handle new prefixed syscall func names Arnaldo noticed that the latest kernel is missing the syscall event system directory in x86. I bisected it down to d5a00528b58c ("syscalls/core, syscalls/x86: Rename struct pt_regs-based sys_*() to __x64_sys_*()"). The system call trace events are special, as there is only one trace event for all system calls (the raw_syscalls). But a macro that wraps the system calls creates meta data for them that copies the name to find the system call that maps to the system call table (the number). At boot up, it does a kallsyms lookup of the system call table to find the function that maps to the meta data of the system call. If it does not find a function, then that system call is ignored. Because the x86 system calls had "__x64_", or "__ia32_" prefixed to the "sys" for the names, they do not match the default compare algorithm. As this was a problem for power pc, the algorithm can be overwritten by the architecture. The solution is to have x86 have its own algorithm to do the compare and this brings back the system call trace events. Link: http://lkml.kernel.org/r/20180417174128.0f3457f0@gandalf.local.home Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Acked-by: Dominik Brodowski Acked-by: Thomas Gleixner Fixes: d5a00528b58c ("syscalls/core, syscalls/x86: Rename struct pt_regs-based sys_*() to __x64_sys_*()") Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/ftrace.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 09ad88572746..cc8f8fcf9b4a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -46,7 +46,21 @@ int ftrace_int3_handler(struct pt_regs *regs); #endif /* CONFIG_FUNCTION_TRACER */ -#if !defined(__ASSEMBLY__) && !defined(COMPILE_OFFSETS) +#ifndef __ASSEMBLY__ + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Compare the symbol name with the system call name. Skip the + * "__x64_sys", "__ia32_sys" or simple "sys" prefix. + */ + return !strcmp(sym + 3, name + 3) || + (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) || + (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)); +} + +#ifndef COMPILE_OFFSETS #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION) #include @@ -67,6 +81,7 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) return false; } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ -#endif /* !__ASSEMBLY__ && !COMPILE_OFFSETS */ +#endif /* !COMPILE_OFFSETS */ +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FTRACE_H */ From ba16293dad626d3e3827cfe0a1a743c1d93e76b7 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Fri, 20 Apr 2018 20:37:58 +0530 Subject: [PATCH 592/660] tracing: Fix kernel crash while using empty filter with perf Kernel is crashing when user tries to record 'ftrace:function' event with empty filter: # perf record -e ftrace:function --filter="" ls # dmesg BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 Oops: 0000 [#1] SMP PTI ... RIP: 0010:ftrace_profile_set_filter+0x14b/0x2d0 RSP: 0018:ffffa4a7c0da7d20 EFLAGS: 00010246 RAX: ffffa4a7c0da7d64 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000000092 RDI: ffff8c48ffc968f0 ... Call Trace: _perf_ioctl+0x54a/0x6b0 ? rcu_all_qs+0x5/0x30 ... After patch: # perf record -e ftrace:function --filter="" ls failed to set filter "" on event ftrace:function with 22 (Invalid argument) Also, if user tries to echo "" > filter, it used to throw an error. This behavior got changed by commit 80765597bc58 ("tracing: Rewrite filter logic to be simpler and faster"). This patch restores the behavior as a side effect: Before patch: # echo "" > filter # After patch: # echo "" > filter bash: echo: write error: Invalid argument # Link: http://lkml.kernel.org/r/20180420150758.19787-1-ravi.bangoria@linux.ibm.com Fixes: 80765597bc58 ("tracing: Rewrite filter logic to be simpler and faster") Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9b4716bb8bb0..1f951b3df60c 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1499,14 +1499,14 @@ static int process_preds(struct trace_event_call *call, return ret; } - if (!nr_preds) { - prog = NULL; - } else { - prog = predicate_parse(filter_string, nr_parens, nr_preds, + if (!nr_preds) + return -EINVAL; + + prog = predicate_parse(filter_string, nr_parens, nr_preds, parse_pred, call, pe); - if (IS_ERR(prog)) - return PTR_ERR(prog); - } + if (IS_ERR(prog)) + return PTR_ERR(prog); + rcu_assign_pointer(filter->prog, prog); return 0; } From bcbd385b61bbdef3491d662203ac2e8186e5be59 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 19 Apr 2018 12:55:56 +0200 Subject: [PATCH 593/660] kprobes: Fix random address output of blacklist file File /sys/kernel/debug/kprobes/blacklist displays random addresses: [root@s8360046 linux]# cat /sys/kernel/debug/kprobes/blacklist 0x0000000047149a90-0x00000000bfcb099a print_type_x8 .... This breaks 'perf probe' which uses the blacklist file to prohibit probes on certain functions by checking the address range. Fix this by printing the correct (unhashed) address. The file mode is read all but this is not an issue as the file hierarchy points out: # ls -ld /sys/ /sys/kernel/ /sys/kernel/debug/ /sys/kernel/debug/kprobes/ /sys/kernel/debug/kprobes/blacklist dr-xr-xr-x 12 root root 0 Apr 19 07:56 /sys/ drwxr-xr-x 8 root root 0 Apr 19 07:56 /sys/kernel/ drwx------ 16 root root 0 Apr 19 06:56 /sys/kernel/debug/ drwxr-xr-x 2 root root 0 Apr 19 06:56 /sys/kernel/debug/kprobes/ -r--r--r-- 1 root root 0 Apr 19 06:56 /sys/kernel/debug/kprobes/blacklist Everything in and below /sys/kernel/debug is rwx to root only, no group or others have access. Background: Directory /sys/kernel/debug/kprobes is created by debugfs_create_dir() which sets the mode bits to rwxr-xr-x. Maybe change that to use the parent's directory mode bits instead? Link: http://lkml.kernel.org/r/20180419105556.86664-1-tmricht@linux.ibm.com Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: stable@vger.kernel.org Cc: # v4.15+ Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S Miller Cc: Masami Hiramatsu Cc: acme@kernel.org Signed-off-by: Thomas Richter Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 102160ff5c66..ea619021d901 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2428,7 +2428,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) struct kprobe_blacklist_entry *ent = list_entry(v, struct kprobe_blacklist_entry, list); - seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr, + seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr, (void *)ent->end_addr, (void *)ent->start_addr); return 0; } From ef39a01203ed2f52319906db555ca66c234ef19b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 5 Apr 2018 18:28:42 +0900 Subject: [PATCH 594/660] selftests: ftrace: Fix trigger extended error testcase Previous testcase redirects echo-out into /dev/null using "&>" as below echo "trigger-command" >> trigger &> /dev/null But this means redirecting both stdout and stderr into /dev/null because it is same as below echo "trigger-command" >> trigger > /dev/null 2>&1 So ">> trigger" redirects stdout to trigger file, but next "> /dev/null" redirects stdout to /dev/null again and the last "2>/&1" redirects stderr to stdout (/dev/null) This fixes it by "2> /dev/null". And also, since it must fail, add "!" to echo command. Link: http://lkml.kernel.org/r/152292052250.15769.12565292689264162435.stgit@devbox Fixes: f06eec4d0f2c ("selftests: ftrace: Add inter-event hist triggers testcases") Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../trigger/inter-event/trigger-extended-error-support.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc index 786dce7e48be..2aabab363cfb 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc @@ -29,7 +29,7 @@ do_reset echo "Test extended error support" echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger -echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null +! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then fail "Failed to generate extended error in histogram" fi From 25aa50e0ca397a5e5d4d6fcecefa8107877d1dd0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 5 Apr 2018 18:29:12 +0900 Subject: [PATCH 595/660] selftests: ftrace: Add a testcase for multiple actions on trigger Add a testcase for multiple actions with different parameters on an event trigger, which has been fixed by commit 192c283e93bd ("tracing: Add action comparisons when testing matching hist triggers"). Link: http://lkml.kernel.org/r/152292055227.15769.6327959816123227152.stgit@devbox Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../trigger-multi-actions-accept.tc | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc new file mode 100644 index 000000000000..c193dce611a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc @@ -0,0 +1,44 @@ +#!/bin/sh +# description: event trigger - test multiple actions on hist trigger + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test multiple actions on hist trigger" +echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events +TRIGGER1=events/sched/sched_wakeup/trigger +TRIGGER2=events/sched/sched_switch/trigger + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1 +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2 + +do_reset + +exit 0 From 9a0fd675304d410f3a9586e1b333e16f4658d56c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 15 Mar 2018 14:06:39 +0800 Subject: [PATCH 596/660] tracing: Fix missing tab for hwlat_detector print format It's been missing for a while but no one is touching that up. Fix it. Link: http://lkml.kernel.org/r/20180315060639.9578-1-peterx@redhat.com CC: Ingo Molnar Cc:stable@vger.kernel.org Fixes: 7b2c86250122d ("tracing: Add NMI tracing in hwlat detector") Signed-off-by: Peter Xu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_entries.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e954ae3d82c0..e3a658bac10f 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -356,7 +356,7 @@ FTRACE_ENTRY(hwlat, hwlat_entry, __field( unsigned int, seqnum ) ), - F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n", + F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tnmi-ts:%llu\tnmi-count:%u\n", __entry->seqnum, __entry->tv_sec, __entry->tv_nsec, From 8a7d6003df41cb16f6b3b620da044fbd92d2f5ee Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Apr 2018 16:19:13 +0200 Subject: [PATCH 597/660] ALSA: hda - Skip jack and others for non-existing PCM streams When CONFIG_SND_DYNAMIC_MINORS isn't set, there are only limited number of devices available, and HD-audio, especially with HDMI/DP codec, will fail to create more than two devices. The driver warns about the lack of such devices and skips the PCM device creations, but the HDMI driver still tries to create the corresponding JACK, SPDIF and ELD controls even for the non-existing PCM substreams. This results in confusion on user-space, and even may break the operation. Similarly, Intel HDMI/DP codec builds the ELD notification from i915 graphics driver, and this may be broken if a notification is sent for the non-existing PCM stream. This patch adds the check of the existence of the assigned PCM substream in the both scenarios above, and skips the further operation if the PCM substream is not assigned. Fixes: 9152085defb6 ("ALSA: hda - add DP MST audio support") Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index b4f1b6e88305..7d7eb1354eee 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1383,6 +1383,8 @@ static void hdmi_pcm_setup_pin(struct hdmi_spec *spec, pcm = get_pcm_rec(spec, per_pin->pcm_idx); else return; + if (!pcm->pcm) + return; if (!test_bit(per_pin->pcm_idx, &spec->pcm_in_use)) return; @@ -2151,8 +2153,13 @@ static int generic_hdmi_build_controls(struct hda_codec *codec) int dev, err; int pin_idx, pcm_idx; - for (pcm_idx = 0; pcm_idx < spec->pcm_used; pcm_idx++) { + if (!get_pcm_rec(spec, pcm_idx)->pcm) { + /* no PCM: mark this for skipping permanently */ + set_bit(pcm_idx, &spec->pcm_bitmap); + continue; + } + err = generic_hdmi_build_jack(codec, pcm_idx); if (err < 0) return err; From ad64dc0137968f09800e58174bbfd5eac9fe5418 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Wed, 10 Jan 2018 10:01:38 -0500 Subject: [PATCH 598/660] drm/amd/display: Fix deadlock when flushing irq Lock irq table when reading a work in queue, unlock to flush the work, lock again till all tasks are cleared Signed-off-by: Mikita Lipski Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 490017df371d..4be21bf54749 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -329,14 +329,15 @@ void amdgpu_dm_irq_fini(struct amdgpu_device *adev) { int src; struct irq_list_head *lh; + unsigned long irq_table_flags; DRM_DEBUG_KMS("DM_IRQ: releasing resources.\n"); - for (src = 0; src < DAL_IRQ_SOURCES_NUMBER; src++) { - + DM_IRQ_TABLE_LOCK(adev, irq_table_flags); /* The handler was removed from the table, * it means it is safe to flush all the 'work' * (because no code can schedule a new one). */ lh = &adev->dm.irq_handler_list_low_tab[src]; + DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags); flush_work(&lh->work); } } From f2877656809386d7bc62c2b1c1b4e58404c486d4 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Mon, 16 Apr 2018 17:28:11 -0400 Subject: [PATCH 599/660] drm/amd/display: Disallow enabling CRTC without primary plane with FB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The below commit "drm/atomic: Try to preserve the crtc enabled state in drm_atomic_remove_fb, v2" introduces a slight behavioral change to rmfb. Instead of disabling a crtc when the primary plane is disabled, it now preserves it. Since DC is currently not equipped to handle this we need to fail such a commit, otherwise we might see a corrupted screen. This is based on Shirish's previous approach but avoids adding all planes to the new atomic state which leads to a full update in DC for any commit, and is not what we intend. Theoretically DM should be able to deal with states with fully populated planes, even for simple updates, such as cursor updates. This should still be addressed in the future. Signed-off-by: Harry Wentland Tested-by: Michel Dänzer Reviewed-by: Tony Cheng Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4e2f379ce217..1dd1142246c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4557,6 +4557,7 @@ static int dm_update_crtcs_state(struct dc *dc, struct amdgpu_dm_connector *aconnector = NULL; struct drm_connector_state *new_con_state = NULL; struct dm_connector_state *dm_conn_state = NULL; + struct drm_plane_state *new_plane_state = NULL; new_stream = NULL; @@ -4564,6 +4565,13 @@ static int dm_update_crtcs_state(struct dc *dc, dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); acrtc = to_amdgpu_crtc(crtc); + new_plane_state = drm_atomic_get_new_plane_state(state, new_crtc_state->crtc->primary); + + if (new_crtc_state->enable && new_plane_state && !new_plane_state->fb) { + ret = -EINVAL; + goto fail; + } + aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); /* TODO This hack should go away */ @@ -4760,7 +4768,7 @@ static int dm_update_planes_state(struct dc *dc, if (!dm_old_crtc_state->stream) continue; - DRM_DEBUG_DRIVER("Disabling DRM plane: %d on DRM crtc %d\n", + DRM_DEBUG_ATOMIC("Disabling DRM plane: %d on DRM crtc %d\n", plane->base.id, old_plane_crtc->base.id); if (!dc_remove_plane_from_context( From c7b8de00384be49dc1617a838b0ce89a0235f319 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Thu, 8 Mar 2018 22:05:35 -0500 Subject: [PATCH 600/660] drm/amd/display: Don't read EDID in atomic_check We shouldn't attempt to read EDID in atomic_check. We really shouldn't even be modifying the connector object, or any other non-state object, but this is a start at least. Moving EDID cleanup to dm_dp_mst_connector_destroy from dm_dp_destroy_mst_connector to ensure the EDID is still available for headless mode. Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 32 ++++++------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 8291d74f26bc..305292a9ff80 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -161,6 +161,11 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector) struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); struct amdgpu_encoder *amdgpu_encoder = amdgpu_dm_connector->mst_encoder; + if (amdgpu_dm_connector->edid) { + kfree(amdgpu_dm_connector->edid); + amdgpu_dm_connector->edid = NULL; + } + drm_encoder_cleanup(&amdgpu_encoder->base); kfree(amdgpu_encoder); drm_connector_cleanup(connector); @@ -181,28 +186,22 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { void dm_dp_mst_dc_sink_create(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct edid *edid; struct dc_sink *dc_sink; struct dc_sink_init_data init_params = { .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + /* FIXME none of this is safe. we shouldn't touch aconnector here in + * atomic_check + */ + /* * TODO: Need to further figure out why ddc.algo is NULL while MST port exists */ if (!aconnector->port || !aconnector->port->aux.ddc.algo) return; - edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); - - if (!edid) { - drm_mode_connector_update_edid_property( - &aconnector->base, - NULL); - return; - } - - aconnector->edid = edid; + ASSERT(aconnector->edid); dc_sink = dc_link_add_remote_sink( aconnector->dc_link, @@ -215,9 +214,6 @@ void dm_dp_mst_dc_sink_create(struct drm_connector *connector) amdgpu_dm_add_sink_to_freesync_module( connector, aconnector->edid); - - drm_mode_connector_update_edid_property( - &aconnector->base, aconnector->edid); } static int dm_dp_mst_get_modes(struct drm_connector *connector) @@ -424,14 +420,6 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; } - if (aconnector->edid) { - kfree(aconnector->edid); - aconnector->edid = NULL; - } - - drm_mode_connector_update_edid_property( - &aconnector->base, - NULL); aconnector->mst_connected = false; } From d973f8535f033fac1599bd5eae6835e6bb304da3 Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Tue, 17 Apr 2018 13:49:48 -0400 Subject: [PATCH 601/660] drm/amd/display: Update MST edid property every time Extended fix to: "Don't read EDID in atomic_check" Fix display property not observed in GUI display after hot plug. Call drm_mode_connector_update_edid_property every time in .get_modes hook, due to the fact that edid property is getting removed from usermode ioctl DRM_IOCTL_MODE_GETCONNECTOR each time in hot unplug. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 305292a9ff80..8c1d084429dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -253,11 +253,11 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) if (aconnector->dc_sink) amdgpu_dm_add_sink_to_freesync_module( connector, edid); - - drm_mode_connector_update_edid_property( - &aconnector->base, edid); } + drm_mode_connector_update_edid_property( + &aconnector->base, aconnector->edid); + ret = drm_add_edid_modes(connector, aconnector->edid); return ret; From 7ad35721e7d5a5b56a4eddcdd9920c3c1e51ae4c Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Tue, 17 Apr 2018 14:39:09 -0400 Subject: [PATCH 602/660] drm/amd/display: Check dc_sink every time in MST hotplug Extended fix to: "Don't read EDID in atomic_check" Fix issue of missing dc_sink in .mode_valid in hot plug routine. Need to check dc_sink everytime in .get_modes hook after checking edid, since edid is not getting removed in hot unplug but dc_sink doesn't. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Roman Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 8c1d084429dc..ace9ad578ca0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -226,10 +226,6 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) if (!aconnector->edid) { struct edid *edid; - struct dc_sink *dc_sink; - struct dc_sink_init_data init_params = { - .link = aconnector->dc_link, - .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); if (!edid) { @@ -240,11 +236,17 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) } aconnector->edid = edid; + } + if (!aconnector->dc_sink) { + struct dc_sink *dc_sink; + struct dc_sink_init_data init_params = { + .link = aconnector->dc_link, + .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; dc_sink = dc_link_add_remote_sink( aconnector->dc_link, - (uint8_t *)edid, - (edid->extensions + 1) * EDID_LENGTH, + (uint8_t *)aconnector->edid, + (aconnector->edid->extensions + 1) * EDID_LENGTH, &init_params); dc_sink->priv = aconnector; @@ -252,7 +254,7 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) if (aconnector->dc_sink) amdgpu_dm_add_sink_to_freesync_module( - connector, edid); + connector, aconnector->edid); } drm_mode_connector_update_edid_property( From 0183df0baf7912b9fae16b5281f1eabbb4572b3d Mon Sep 17 00:00:00 2001 From: Jeffrin Jose T Date: Mon, 23 Apr 2018 19:40:22 +0530 Subject: [PATCH 603/660] selftests:firmware: fixes a call to a wrong function name This is a patch to the tools/testing/selftests/firmware/fw_run_tests.sh file which fixes a bug which calls to a wrong function name,which in turn blocks the execution of certain tests. Signed-off-by: Jeffrin Jose T Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh index 06d638e9dc62..cffdd4eb0a57 100755 --- a/tools/testing/selftests/firmware/fw_run_tests.sh +++ b/tools/testing/selftests/firmware/fw_run_tests.sh @@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then run_test_config_0003 else echo "Running basic kernel configuration, working with your config" - run_test + run_tests fi From 4412efecf7fda3b8f9f18feed7938f2281f5ccbc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 25 Apr 2018 04:01:44 +0800 Subject: [PATCH 604/660] Revert "blk-mq: remove code for dealing with remapping queue" This reverts commit 37c7c6c76d431dd7ef9c29d95f6052bd425f004c. Turns out some drivers(most are FC drivers) may not use managed IRQ affinity, and has their customized .map_queues meantime, so still keep this code for avoiding regression. Reported-by: Laurence Oberman Tested-by: Laurence Oberman Tested-by: Christian Borntraeger Tested-by: Stefan Haberland Cc: Ewan Milne Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e4aa36817367..c3621453ad87 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2336,7 +2336,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, static void blk_mq_map_swqueue(struct request_queue *q) { - unsigned int i; + unsigned int i, hctx_idx; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; @@ -2353,8 +2353,23 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Map software to hardware queues. + * + * If the cpu isn't present, the cpu is mapped to first hctx. */ for_each_possible_cpu(i) { + hctx_idx = q->mq_map[i]; + /* unmapped hw queue can be remapped after CPU topo changed */ + if (!set->tags[hctx_idx] && + !__blk_mq_alloc_rq_map(set, hctx_idx)) { + /* + * If tags initialization fail for some hctx, + * that hctx won't be brought online. In this + * case, remap the current ctx to hctx[0] which + * is guaranteed to always have tags allocated + */ + q->mq_map[i] = 0; + } + ctx = per_cpu_ptr(q->queue_ctx, i); hctx = blk_mq_map_queue(q, i); @@ -2366,8 +2381,21 @@ static void blk_mq_map_swqueue(struct request_queue *q) mutex_unlock(&q->sysfs_lock); queue_for_each_hw_ctx(q, hctx, i) { - /* every hctx should get mapped by at least one CPU */ - WARN_ON(!hctx->nr_ctx); + /* + * If no software queues are mapped to this hardware queue, + * disable it and free the request entries. + */ + if (!hctx->nr_ctx) { + /* Never unmap queue 0. We need it as a + * fallback in case of a new remap fails + * allocation + */ + if (i && set->tags[i]) + blk_mq_free_map_and_requests(set, i); + + hctx->tags = NULL; + continue; + } hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); From 6b614a87f3f477571e319281e84dba11e0ea0a76 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Apr 2018 15:21:44 +0200 Subject: [PATCH 605/660] ARM: amba: Fix race condition with driver_override The driver_override implementation is susceptible to a race condition when different threads are reading vs storing a different driver override. Add locking to avoid this race condition. Cfr. commits 6265539776a0810b ("driver core: platform: fix race condition with driver_override") and 9561475db680f714 ("PCI: Fix race condition with driver_override"). Fixes: 3cf385713460eb2b ("ARM: 8256/1: driver coamba: add device binding path 'driver_override'") Signed-off-by: Geert Uytterhoeven Reviewed-by: Todd Kjos Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/amba/bus.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 594c228d2f02..c77eb6e65646 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -69,11 +69,15 @@ static ssize_t driver_override_show(struct device *_dev, struct device_attribute *attr, char *buf) { struct amba_device *dev = to_amba_device(_dev); + ssize_t len; if (!dev->driver_override) return 0; - return sprintf(buf, "%s\n", dev->driver_override); + device_lock(_dev); + len = sprintf(buf, "%s\n", dev->driver_override); + device_unlock(_dev); + return len; } static ssize_t driver_override_store(struct device *_dev, @@ -81,7 +85,7 @@ static ssize_t driver_override_store(struct device *_dev, const char *buf, size_t count) { struct amba_device *dev = to_amba_device(_dev); - char *driver_override, *old = dev->driver_override, *cp; + char *driver_override, *old, *cp; if (count > PATH_MAX) return -EINVAL; @@ -94,12 +98,15 @@ static ssize_t driver_override_store(struct device *_dev, if (cp) *cp = '\0'; + device_lock(_dev); + old = dev->driver_override; if (strlen(driver_override)) { dev->driver_override = driver_override; } else { kfree(driver_override); dev->driver_override = NULL; } + device_unlock(_dev); kfree(old); From d2ffed5185df9d8d9ccd150e4340e3b6f96a8381 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Apr 2018 15:21:45 +0200 Subject: [PATCH 606/660] ARM: amba: Don't read past the end of sysfs "driver_override" buffer When printing the driver_override parameter when it is 4095 and 4094 bytes long, the printing code would access invalid memory because we need count + 1 bytes for printing. Cfr. commits 4efe874aace57dba ("PCI: Don't read past the end of sysfs "driver_override" buffer") and bf563b01c2895a4b ("driver core: platform: Don't read past the end of "driver_override" buffer"). Fixes: 3cf385713460eb2b ("ARM: 8256/1: driver coamba: add device binding path 'driver_override'") Signed-off-by: Geert Uytterhoeven Reviewed-by: Todd Kjos Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/amba/bus.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index c77eb6e65646..8e6ac3031662 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -87,7 +87,8 @@ static ssize_t driver_override_store(struct device *_dev, struct amba_device *dev = to_amba_device(_dev); char *driver_override, *old, *cp; - if (count > PATH_MAX) + /* We need to keep extra room for a newline */ + if (count >= (PAGE_SIZE - 1)) return -EINVAL; driver_override = kstrndup(buf, count, GFP_KERNEL); From 0d5ec281c0175d10f8d9be4d4a9c5fb37767ed00 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 22 Apr 2018 19:51:22 -0500 Subject: [PATCH 607/660] SMB311: Fix reconnect The preauth hash was not being recalculated properly on reconnect of SMB3.11 dialect mounts (which caused access denied repeatedly on auto-reconnect). Fixes: 8bd68c6e47ab ("CIFS: implement v3.11 preauth integrity") Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg --- fs/cifs/transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 3fb0e433b8e2..927226a2122f 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -753,7 +753,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, goto out; #ifdef CONFIG_CIFS_SMB311 - if (ses->status == CifsNew) + if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) smb311_update_preauth_hash(ses, rqst->rq_iov+1, rqst->rq_nvec-1); #endif @@ -798,7 +798,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, *resp_buf_type = CIFS_SMALL_BUFFER; #ifdef CONFIG_CIFS_SMB311 - if (ses->status == CifsNew) { + if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) { struct kvec iov = { .iov_base = buf + 4, .iov_len = get_rfc1002_length(buf) From bb4c0419476bd3982ba802f0f49de83cd79532d8 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Apr 2018 12:17:08 -0700 Subject: [PATCH 608/660] cifs: smbd: Don't use RDMA read/write when signing is used SMB server will not sign data transferred through RDMA read/write. When signing is used, it's a good idea to have all the data signed. In this case, use RDMA send/recv for all data transfers. This will degrade performance as this is not generally configured in RDMA environemnt. So warn the user on signing and RDMA send/recv. Signed-off-by: Long Li Acked-by: Ronnie Sahlberg Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 3 +++ fs/cifs/smb2ops.c | 18 ++++++++++++++---- fs/cifs/smb2pdu.c | 4 ++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6d3e40d7029c..1529a088383d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -455,6 +455,9 @@ cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required) server->sign = true; } + if (cifs_rdma_enabled(server) && server->sign) + cifs_dbg(VFS, "Signing is enabled, and RDMA read/write will be disabled"); + return 0; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 38ebf3f357d2..b76b85881dcc 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -252,9 +252,14 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT - if (server->rdma) - wsize = min_t(unsigned int, + if (server->rdma) { + if (server->sign) + wsize = min_t(unsigned int, + wsize, server->smbd_conn->max_fragmented_send_size); + else + wsize = min_t(unsigned int, wsize, server->smbd_conn->max_readwrite_size); + } #endif if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); @@ -272,9 +277,14 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT - if (server->rdma) - rsize = min_t(unsigned int, + if (server->rdma) { + if (server->sign) + rsize = min_t(unsigned int, + rsize, server->smbd_conn->max_fragmented_recv_size); + else + rsize = min_t(unsigned int, rsize, server->smbd_conn->max_readwrite_size); + } #endif if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9aea138dd71f..60db51bae0e3 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2591,7 +2591,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len, * If we want to do a RDMA write, fill in and append * smbd_buffer_descriptor_v1 to the end of read request */ - if (server->rdma && rdata && + if (server->rdma && rdata && !server->sign && rdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) { struct smbd_buffer_descriptor_v1 *v1; @@ -2969,7 +2969,7 @@ smb2_async_writev(struct cifs_writedata *wdata, * If we want to do a server RDMA read, fill in and append * smbd_buffer_descriptor_v1 to the end of write request */ - if (server->rdma && wdata->bytes >= + if (server->rdma && !server->sign && wdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) { struct smbd_buffer_descriptor_v1 *v1; From 8bcda1d2a79da4ab84162574eee2c9f6e1a12a03 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Apr 2018 12:17:07 -0700 Subject: [PATCH 609/660] cifs: smbd: Avoid allocating iov on the stack It's not necessary to allocate another iov when going through the buffers in smbd_send() through RDMA send. Remove it to reduce stack size. Thanks to Matt for spotting a printk typo in the earlier version of this. CC: Matt Redfearn Signed-off-by: Long Li Acked-by: Ronnie Sahlberg Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/smbdirect.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 87817ddcc096..c62f7c95683c 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -2086,7 +2086,7 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) int start, i, j; int max_iov_size = info->max_send_size - sizeof(struct smbd_data_transfer); - struct kvec iov[SMBDIRECT_MAX_SGE]; + struct kvec *iov; int rc; info->smbd_send_pending++; @@ -2096,32 +2096,20 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) } /* - * This usually means a configuration error - * We use RDMA read/write for packet size > rdma_readwrite_threshold - * as long as it's properly configured we should never get into this - * situation - */ - if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) { - log_write(ERR, "maximum send segment %x exceeding %x\n", - rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE); - rc = -EINVAL; - goto done; - } - - /* - * Remove the RFC1002 length defined in MS-SMB2 section 2.1 - * It is used only for TCP transport + * Skip the RFC1002 length defined in MS-SMB2 section 2.1 + * It is used only for TCP transport in the iov[0] * In future we may want to add a transport layer under protocol * layer so this will only be issued to TCP transport */ - iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4; - iov[0].iov_len = rqst->rq_iov[0].iov_len - 4; - buflen += iov[0].iov_len; + + if (rqst->rq_iov[0].iov_len != 4) { + log_write(ERR, "expected the pdu length in 1st iov, but got %zu\n", rqst->rq_iov[0].iov_len); + return -EINVAL; + } + iov = &rqst->rq_iov[1]; /* total up iov array first */ - for (i = 1; i < rqst->rq_nvec; i++) { - iov[i].iov_base = rqst->rq_iov[i].iov_base; - iov[i].iov_len = rqst->rq_iov[i].iov_len; + for (i = 0; i < rqst->rq_nvec-1; i++) { buflen += iov[i].iov_len; } @@ -2198,14 +2186,14 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) goto done; } i++; - if (i == rqst->rq_nvec) + if (i == rqst->rq_nvec-1) break; } start = i; buflen = 0; } else { i++; - if (i == rqst->rq_nvec) { + if (i == rqst->rq_nvec-1) { /* send out all remaining vecs */ remaining_data_length -= buflen; log_write(INFO, From b93815d0f37e7c4a056a143e6d782abc084ea56b Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Wed, 25 Apr 2018 12:25:39 -0400 Subject: [PATCH 610/660] firmware: some documentation fixes Including: - Fixup outdated kernel-doc paths - Slightly too short title underline - Some typos Signed-off-by: Andres Rodriguez Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/firmware/request_firmware.rst | 6 +++--- drivers/base/firmware_loader/fallback.c | 4 ++-- drivers/base/firmware_loader/fallback.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/firmware/request_firmware.rst b/Documentation/driver-api/firmware/request_firmware.rst index 20f21ed427a5..d5ec95a7195b 100644 --- a/Documentation/driver-api/firmware/request_firmware.rst +++ b/Documentation/driver-api/firmware/request_firmware.rst @@ -50,11 +50,11 @@ Special optimizations on reboot Some devices have an optimization in place to enable the firmware to be retained during system reboot. When such optimizations are used the driver author must ensure the firmware is still available on resume from suspend, -this can be done with firmware_request_cache() insted of requesting for the -firmare to be loaded. +this can be done with firmware_request_cache() instead of requesting for the +firmware to be loaded. firmware_request_cache() ------------------------ +------------------------ .. kernel-doc:: drivers/base/firmware_loader/main.c :functions: firmware_request_cache diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 31b5015b59fe..358354148dec 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -537,8 +537,8 @@ exit: } /** - * fw_load_sysfs_fallback - load a firmware via the syfs fallback mechanism - * @fw_sysfs: firmware syfs information for the firmware to load + * fw_load_sysfs_fallback - load a firmware via the sysfs fallback mechanism + * @fw_sysfs: firmware sysfs information for the firmware to load * @opt_flags: flags of options, FW_OPT_* * @timeout: timeout to wait for the load * diff --git a/drivers/base/firmware_loader/fallback.h b/drivers/base/firmware_loader/fallback.h index dfebc644ed35..f8255670a663 100644 --- a/drivers/base/firmware_loader/fallback.h +++ b/drivers/base/firmware_loader/fallback.h @@ -6,7 +6,7 @@ #include /** - * struct firmware_fallback_config - firmware fallback configuratioon settings + * struct firmware_fallback_config - firmware fallback configuration settings * * Helps describe and fine tune the fallback mechanism. * From 2855b33514d290c51d52d94e25d3ef942cd4d578 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 Apr 2018 19:54:23 +0300 Subject: [PATCH 611/660] virtio_console: don't tie bufs to a vq an allocated buffer doesn't need to be tied to a vq - only vq->vdev is ever used. Pass the function the just what it needs - the vdev. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 468f06134012..3e56f328b4cb 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -422,7 +422,7 @@ static void reclaim_dma_bufs(void) } } -static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, +static struct port_buffer *alloc_buf(struct virtio_device *vdev, size_t buf_size, int pages) { struct port_buffer *buf; @@ -445,16 +445,16 @@ static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, return buf; } - if (is_rproc_serial(vq->vdev)) { + if (is_rproc_serial(vdev)) { /* * Allocate DMA memory from ancestor. When a virtio * device is created by remoteproc, the DMA memory is * associated with the grandparent device: * vdev => rproc => platform-dev. */ - if (!vq->vdev->dev.parent || !vq->vdev->dev.parent->parent) + if (!vdev->dev.parent || !vdev->dev.parent->parent) goto free_buf; - buf->dev = vq->vdev->dev.parent->parent; + buf->dev = vdev->dev.parent->parent; /* Increase device refcnt to avoid freeing it */ get_device(buf->dev); @@ -838,7 +838,7 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, count = min((size_t)(32 * 1024), count); - buf = alloc_buf(port->out_vq, count, 0); + buf = alloc_buf(port->portdev->vdev, count, 0); if (!buf) return -ENOMEM; @@ -957,7 +957,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, if (ret < 0) goto error_out; - buf = alloc_buf(port->out_vq, 0, pipe->nrbufs); + buf = alloc_buf(port->portdev->vdev, 0, pipe->nrbufs); if (!buf) { ret = -ENOMEM; goto error_out; @@ -1374,7 +1374,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) nr_added_bufs = 0; do { - buf = alloc_buf(vq, PAGE_SIZE, 0); + buf = alloc_buf(vq->vdev, PAGE_SIZE, 0); if (!buf) break; From 24a7e4d20783c0514850f24a5c41ede46ab058f0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 Apr 2018 20:22:40 +0300 Subject: [PATCH 612/660] virtio: add ability to iterate over vqs For cleanup it's helpful to be able to simply scan all vqs and discard all data. Add an iterator to do that. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 988c7355bc22..fa1b5da2804e 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -157,6 +157,9 @@ int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); #endif +#define virtio_device_for_each_vq(vdev, vq) \ + list_for_each_entry(vq, &vdev->vqs, list) + /** * virtio_driver - operations for a virtio I/O driver * @driver: underlying device driver (populate name and owner). From a7a69ec0d8e4a58be7db88d33cbfa2912807bb2b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 Apr 2018 20:24:23 +0300 Subject: [PATCH 613/660] virtio_console: free buffers after reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Console driver is out of spec. The spec says: A driver MUST NOT decrement the available idx on a live virtqueue (ie. there is no way to “unexpose” buffers). and it does exactly that by trying to detach unused buffers without doing a device reset first. Defer detaching the buffers until device unplug. Of course this means we might get an interrupt for a vq without an attached port now. Handle that by discarding the consumed buffer. Reported-by: Tiwei Bie Fixes: b3258ff1d6 ("virtio: Decrement avail idx on buffer detach") Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 49 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 3e56f328b4cb..26a66ffd943e 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1402,7 +1402,6 @@ static int add_port(struct ports_device *portdev, u32 id) { char debugfs_name[16]; struct port *port; - struct port_buffer *buf; dev_t devt; unsigned int nr_added_bufs; int err; @@ -1513,8 +1512,6 @@ static int add_port(struct ports_device *portdev, u32 id) return 0; free_inbufs: - while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf, true); free_device: device_destroy(pdrvdata.class, port->dev->devt); free_cdev: @@ -1539,34 +1536,14 @@ static void remove_port(struct kref *kref) static void remove_port_data(struct port *port) { - struct port_buffer *buf; - spin_lock_irq(&port->inbuf_lock); /* Remove unused data this port might have received. */ discard_port_data(port); spin_unlock_irq(&port->inbuf_lock); - /* Remove buffers we queued up for the Host to send us data in. */ - do { - spin_lock_irq(&port->inbuf_lock); - buf = virtqueue_detach_unused_buf(port->in_vq); - spin_unlock_irq(&port->inbuf_lock); - if (buf) - free_buf(buf, true); - } while (buf); - spin_lock_irq(&port->outvq_lock); reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); - - /* Free pending buffers from the out-queue. */ - do { - spin_lock_irq(&port->outvq_lock); - buf = virtqueue_detach_unused_buf(port->out_vq); - spin_unlock_irq(&port->outvq_lock); - if (buf) - free_buf(buf, true); - } while (buf); } /* @@ -1791,13 +1768,24 @@ static void control_work_handler(struct work_struct *work) spin_unlock(&portdev->c_ivq_lock); } +static void flush_bufs(struct virtqueue *vq, bool can_sleep) +{ + struct port_buffer *buf; + unsigned int len; + + while ((buf = virtqueue_get_buf(vq, &len))) + free_buf(buf, can_sleep); +} + static void out_intr(struct virtqueue *vq) { struct port *port; port = find_port_by_vq(vq->vdev->priv, vq); - if (!port) + if (!port) { + flush_bufs(vq, false); return; + } wake_up_interruptible(&port->waitqueue); } @@ -1808,8 +1796,10 @@ static void in_intr(struct virtqueue *vq) unsigned long flags; port = find_port_by_vq(vq->vdev->priv, vq); - if (!port) + if (!port) { + flush_bufs(vq, false); return; + } spin_lock_irqsave(&port->inbuf_lock, flags); port->inbuf = get_inbuf(port); @@ -1984,6 +1974,15 @@ static const struct file_operations portdev_fops = { static void remove_vqs(struct ports_device *portdev) { + struct virtqueue *vq; + + virtio_device_for_each_vq(portdev->vdev, vq) { + struct port_buffer *buf; + + flush_bufs(vq, true); + while ((buf = virtqueue_detach_unused_buf(vq))) + free_buf(buf, true); + } portdev->vdev->config->del_vqs(portdev->vdev); kfree(portdev->in_vqs); kfree(portdev->out_vqs); From 61a8950c5c5708cf2068b29ffde94e454e528208 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 Apr 2018 20:49:04 +0300 Subject: [PATCH 614/660] virtio_console: drop custom control queue cleanup We now cleanup all VQs on device removal - no need to handle the control VQ specially. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 26a66ffd943e..2d87ce555140 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1988,21 +1988,6 @@ static void remove_vqs(struct ports_device *portdev) kfree(portdev->out_vqs); } -static void remove_controlq_data(struct ports_device *portdev) -{ - struct port_buffer *buf; - unsigned int len; - - if (!use_multiport(portdev)) - return; - - while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf, true); - - while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf, true); -} - /* * Once we're further in boot, we get probed like any other virtio * device. @@ -2163,7 +2148,6 @@ static void virtcons_remove(struct virtio_device *vdev) * have to just stop using the port, as the vqs are going * away. */ - remove_controlq_data(portdev); remove_vqs(portdev); kfree(portdev); } @@ -2208,7 +2192,6 @@ static int virtcons_freeze(struct virtio_device *vdev) */ if (use_multiport(portdev)) virtqueue_disable_cb(portdev->c_ivq); - remove_controlq_data(portdev); list_for_each_entry(port, &portdev->ports, list) { virtqueue_disable_cb(port->in_vq); From aa44ec867030a72e8aa127977e37dec551d8df19 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 Apr 2018 20:51:18 +0300 Subject: [PATCH 615/660] virtio_console: move removal code Will make it reusable for error handling. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 72 +++++++++++++++++------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 2d87ce555140..e8480fe2e1d8 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1988,6 +1988,42 @@ static void remove_vqs(struct ports_device *portdev) kfree(portdev->out_vqs); } +static void virtcons_remove(struct virtio_device *vdev) +{ + struct ports_device *portdev; + struct port *port, *port2; + + portdev = vdev->priv; + + spin_lock_irq(&pdrvdata_lock); + list_del(&portdev->list); + spin_unlock_irq(&pdrvdata_lock); + + /* Disable interrupts for vqs */ + vdev->config->reset(vdev); + /* Finish up work that's lined up */ + if (use_multiport(portdev)) + cancel_work_sync(&portdev->control_work); + else + cancel_work_sync(&portdev->config_work); + + list_for_each_entry_safe(port, port2, &portdev->ports, list) + unplug_port(port); + + unregister_chrdev(portdev->chr_major, "virtio-portsdev"); + + /* + * When yanking out a device, we immediately lose the + * (device-side) queues. So there's no point in keeping the + * guest side around till we drop our final reference. This + * also means that any ports which are in an open state will + * have to just stop using the port, as the vqs are going + * away. + */ + remove_vqs(portdev); + kfree(portdev); +} + /* * Once we're further in boot, we get probed like any other virtio * device. @@ -2116,42 +2152,6 @@ fail: return err; } -static void virtcons_remove(struct virtio_device *vdev) -{ - struct ports_device *portdev; - struct port *port, *port2; - - portdev = vdev->priv; - - spin_lock_irq(&pdrvdata_lock); - list_del(&portdev->list); - spin_unlock_irq(&pdrvdata_lock); - - /* Disable interrupts for vqs */ - vdev->config->reset(vdev); - /* Finish up work that's lined up */ - if (use_multiport(portdev)) - cancel_work_sync(&portdev->control_work); - else - cancel_work_sync(&portdev->config_work); - - list_for_each_entry_safe(port, port2, &portdev->ports, list) - unplug_port(port); - - unregister_chrdev(portdev->chr_major, "virtio-portsdev"); - - /* - * When yanking out a device, we immediately lose the - * (device-side) queues. So there's no point in keeping the - * guest side around till we drop our final reference. This - * also means that any ports which are in an open state will - * have to just stop using the port, as the vqs are going - * away. - */ - remove_vqs(portdev); - kfree(portdev); -} - static struct virtio_device_id id_table[] = { { VIRTIO_ID_CONSOLE, VIRTIO_DEV_ANY_ID }, { 0 }, From 5c60300d68da32ca77f7f978039dc72bfc78b06b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 Apr 2018 21:00:13 +0300 Subject: [PATCH 616/660] virtio_console: reset on out of memory When out of memory and we can't add ctrl vq buffers, probe fails. Unfortunately the error handling is out of spec: it calls del_vqs without bothering to reset the device first. To fix, call the full cleanup function in this case. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index e8480fe2e1d8..21085515814f 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -2090,6 +2090,7 @@ static int virtcons_probe(struct virtio_device *vdev) spin_lock_init(&portdev->ports_lock); INIT_LIST_HEAD(&portdev->ports); + INIT_LIST_HEAD(&portdev->list); virtio_device_ready(portdev->vdev); @@ -2107,8 +2108,15 @@ static int virtcons_probe(struct virtio_device *vdev) if (!nr_added_bufs) { dev_err(&vdev->dev, "Error allocating buffers for control queue\n"); - err = -ENOMEM; - goto free_vqs; + /* + * The host might want to notify mgmt sw about device + * add failure. + */ + __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, + VIRTIO_CONSOLE_DEVICE_READY, 0); + /* Device was functional: we need full cleanup. */ + virtcons_remove(vdev); + return -ENOMEM; } } else { /* @@ -2139,11 +2147,6 @@ static int virtcons_probe(struct virtio_device *vdev) return 0; -free_vqs: - /* The host might want to notify mgmt sw about device add failure */ - __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, - VIRTIO_CONSOLE_DEVICE_READY, 0); - remove_vqs(portdev); free_chrdev: unregister_chrdev(portdev->chr_major, "virtio-portsdev"); free: From ab170c27361d1578b4769276ce2bbdb14394743d Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 21 Apr 2018 06:51:55 +0200 Subject: [PATCH 617/660] Revert "drm/sun4i: add lvds mode_valid function" The reverted commit broke LVDS output on TBS A711 Tablet. That tablet has simple-panel node that has fixed pixel clock-frequency that A83T SoC used in the tablet can't generate exactly. Requested rate is 52000000 and rounded_rate is calculated as 51857142. It's close enough for it to work in practice, but with strict check in the reverted commit, the mode is rejected needlessly in this case. DT allows to specify a range of values for simple-panel/clock-frequency, but driver doesn't respect that ATM. Given that TBS A711 is the single user of sun4i-lvds driver, let's revert that commit for now, until a better solution for the problem is found. Also see: https://patchwork.kernel.org/patch/9446385/ for relevant discussion (or search for "[RFC] drm/sun4i: rgb: Add 5% tolerance to dot clock frequency check"). Fixes: e4e4b7ad50cf ("drm/sun4i: add lvds mode_valid function") Reported-by: Ondrej Jirman Signed-off-by: Ondrej Jirman Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180421045155.15332-1-megous@megous.com Signed-off-by: Sean Paul --- drivers/gpu/drm/sun4i/sun4i_lvds.c | 55 ------------------------------ 1 file changed, 55 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c index bffff4c9fbf5..be3f14d7746d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_lvds.c +++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c @@ -94,64 +94,9 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder) } } -static enum drm_mode_status sun4i_lvds_encoder_mode_valid(struct drm_encoder *crtc, - const struct drm_display_mode *mode) -{ - struct sun4i_lvds *lvds = drm_encoder_to_sun4i_lvds(crtc); - struct sun4i_tcon *tcon = lvds->tcon; - u32 hsync = mode->hsync_end - mode->hsync_start; - u32 vsync = mode->vsync_end - mode->vsync_start; - unsigned long rate = mode->clock * 1000; - long rounded_rate; - - DRM_DEBUG_DRIVER("Validating modes...\n"); - - if (hsync < 1) - return MODE_HSYNC_NARROW; - - if (hsync > 0x3ff) - return MODE_HSYNC_WIDE; - - if ((mode->hdisplay < 1) || (mode->htotal < 1)) - return MODE_H_ILLEGAL; - - if ((mode->hdisplay > 0x7ff) || (mode->htotal > 0xfff)) - return MODE_BAD_HVALUE; - - DRM_DEBUG_DRIVER("Horizontal parameters OK\n"); - - if (vsync < 1) - return MODE_VSYNC_NARROW; - - if (vsync > 0x3ff) - return MODE_VSYNC_WIDE; - - if ((mode->vdisplay < 1) || (mode->vtotal < 1)) - return MODE_V_ILLEGAL; - - if ((mode->vdisplay > 0x7ff) || (mode->vtotal > 0xfff)) - return MODE_BAD_VVALUE; - - DRM_DEBUG_DRIVER("Vertical parameters OK\n"); - - tcon->dclk_min_div = 7; - tcon->dclk_max_div = 7; - rounded_rate = clk_round_rate(tcon->dclk, rate); - if (rounded_rate < rate) - return MODE_CLOCK_LOW; - - if (rounded_rate > rate) - return MODE_CLOCK_HIGH; - - DRM_DEBUG_DRIVER("Clock rate OK\n"); - - return MODE_OK; -} - static const struct drm_encoder_helper_funcs sun4i_lvds_enc_helper_funcs = { .disable = sun4i_lvds_encoder_disable, .enable = sun4i_lvds_encoder_enable, - .mode_valid = sun4i_lvds_encoder_mode_valid, }; static const struct drm_encoder_funcs sun4i_lvds_enc_funcs = { From 66c0255cf58718517e4cdff3612b92b82c2bf71b Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 18 Apr 2018 07:42:56 +0200 Subject: [PATCH 618/660] qxl: fix qxl_release_{map,unmap} s/PAGE_SIZE/PAGE_MASK/ Luckily release_offset is never larger than PAGE_SIZE, so the bug has no bad side effects and managed to stay unnoticed for years that way ... Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20180418054257.15388-2-kraxel@redhat.com Signed-off-by: Sean Paul --- drivers/gpu/drm/qxl/qxl_ioctl.c | 4 ++-- drivers/gpu/drm/qxl/qxl_release.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index e238a1a2eca1..6cc9f3367fa0 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -182,9 +182,9 @@ static int qxl_process_single_command(struct qxl_device *qdev, goto out_free_reloc; /* TODO copy slow path code from i915 */ - fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE)); + fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_MASK)); unwritten = __copy_from_user_inatomic_nocache - (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), + (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_MASK), u64_to_user_ptr(cmd->command), cmd->command_size); { diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 5d84a66fed36..a0b4244d283d 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -411,10 +411,10 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev, struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); - ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE); + ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_MASK); if (!ptr) return NULL; - info = ptr + (release->release_offset & ~PAGE_SIZE); + info = ptr + (release->release_offset & ~PAGE_MASK); return info; } @@ -426,7 +426,7 @@ void qxl_release_unmap(struct qxl_device *qdev, struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); void *ptr; - ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE); + ptr = ((void *)info) - (release->release_offset & ~PAGE_MASK); qxl_bo_kunmap_atomic_page(qdev, bo, ptr); } From 7a31805ba28c93f80cf874492e2f21da80851a9f Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 18 Apr 2018 07:42:57 +0200 Subject: [PATCH 619/660] qxl: keep separate release_bo pointer qxl expects that list_first_entry(release->bos) returns the first element qxl added to the list. ttm_eu_reserve_buffers() may reorder the list though. Add a release_bo field to struct qxl_release and use that instead. Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20180418054257.15388-3-kraxel@redhat.com Signed-off-by: Sean Paul --- drivers/gpu/drm/qxl/qxl_cmd.c | 6 ++---- drivers/gpu/drm/qxl/qxl_drv.h | 1 + drivers/gpu/drm/qxl/qxl_release.c | 12 ++++++------ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index c0fb52c6d4ca..01665b98c57e 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -179,10 +179,9 @@ qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *relea uint32_t type, bool interruptible) { struct qxl_command cmd; - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset); return qxl_ring_push(qdev->command_ring, &cmd, interruptible); } @@ -192,10 +191,9 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas uint32_t type, bool interruptible) { struct qxl_command cmd; - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset); return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible); } diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 00a1a66b052a..864b456080c4 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -167,6 +167,7 @@ struct qxl_release { int id; int type; + struct qxl_bo *release_bo; uint32_t release_offset; uint32_t surface_release_id; struct ww_acquire_ctx ticket; diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index a0b4244d283d..7cb214577275 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -173,6 +173,7 @@ qxl_release_free_list(struct qxl_release *release) list_del(&entry->tv.head); kfree(entry); } + release->release_bo = NULL; } void @@ -296,7 +297,6 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, { if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) { int idr_ret; - struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head); struct qxl_bo *bo; union qxl_release_info *info; @@ -304,8 +304,9 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release); if (idr_ret < 0) return idr_ret; - bo = to_qxl_bo(entry->tv.bo); + bo = create_rel->release_bo; + (*release)->release_bo = bo; (*release)->release_offset = create_rel->release_offset + 64; qxl_release_list_add(*release, bo); @@ -365,6 +366,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]); + (*release)->release_bo = bo; (*release)->release_offset = qdev->current_release_bo_offset[cur_idx] * release_size_per_bo[cur_idx]; qdev->current_release_bo_offset[cur_idx]++; @@ -408,8 +410,7 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev, { void *ptr; union qxl_release_info *info; - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); - struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); + struct qxl_bo *bo = release->release_bo; ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_MASK); if (!ptr) @@ -422,8 +423,7 @@ void qxl_release_unmap(struct qxl_device *qdev, struct qxl_release *release, union qxl_release_info *info) { - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); - struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); + struct qxl_bo *bo = release->release_bo; void *ptr; ptr = ((void *)info) - (release->release_offset & ~PAGE_MASK); From d02d270014f70dcab0117776b81a37b6fca745ae Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 3 Apr 2018 11:59:04 +0200 Subject: [PATCH 620/660] drm/virtio: fix vq wait_event condition Wait until we have enough space in the virt queue to actually queue up our request. Avoids the guest spinning in case we have a non-zero amount of free entries but not enough for the request. Cc: stable@vger.kernel.org Reported-by: Alain Magloire Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Link: http://patchwork.freedesktop.org/patch/msgid/20180403095904.11152-1-kraxel@redhat.com Signed-off-by: Sean Paul --- drivers/gpu/drm/virtio/virtgpu_vq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 48e4f1df6e5d..020070d483d3 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -293,7 +293,7 @@ retry: ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC); if (ret == -ENOSPC) { spin_unlock(&vgdev->ctrlq.qlock); - wait_event(vgdev->ctrlq.ack_queue, vq->num_free); + wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= outcnt + incnt); spin_lock(&vgdev->ctrlq.qlock); goto retry; } else { @@ -368,7 +368,7 @@ retry: ret = virtqueue_add_sgs(vq, sgs, outcnt, 0, vbuf, GFP_ATOMIC); if (ret == -ENOSPC) { spin_unlock(&vgdev->cursorq.qlock); - wait_event(vgdev->cursorq.ack_queue, vq->num_free); + wait_event(vgdev->cursorq.ack_queue, vq->num_free >= outcnt); spin_lock(&vgdev->cursorq.qlock); goto retry; } else { From 1f6b8eef11c3d097bc8a6b2bbb868eb47ec6f7d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Apr 2018 16:02:50 +0300 Subject: [PATCH 621/660] drm/edid: Reset more of the display info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're currently failing to reset everything in display_info.hdmi which will potentially cause us to use stale information when swapping monitors. Eg. if the user replaces a HDMI 2.0 monitor with a HDMI 1.x monitor we will continue to think that the monitor supports scrambling. That will lead to a black screen since the HDMI 1.x monitor won't understand the scrambled signal. Fix the problem by clearing display_info.hdmi fully. And while at eliminate some duplicated code by calling drm_reset_display_info() in drm_add_display_info(). Cc: stable@vger.kernel.org Cc: Antony Chen Cc: Shashank Sharma Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105655 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180424130250.7028-1-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter Tested-by: Antony Chen Signed-off-by: Sean Paul --- drivers/gpu/drm/drm_edid.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 134069f36482..39f1db4acda4 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4451,6 +4451,7 @@ drm_reset_display_info(struct drm_connector *connector) info->max_tmds_clock = 0; info->dvi_dual = false; info->has_hdmi_infoframe = false; + memset(&info->hdmi, 0, sizeof(info->hdmi)); info->non_desktop = 0; } @@ -4462,17 +4463,11 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi u32 quirks = edid_get_quirks(edid); + drm_reset_display_info(connector); + info->width_mm = edid->width_cm * 10; info->height_mm = edid->height_cm * 10; - /* driver figures it out in this case */ - info->bpc = 0; - info->color_formats = 0; - info->cea_rev = 0; - info->max_tmds_clock = 0; - info->dvi_dual = false; - info->has_hdmi_infoframe = false; - info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP); DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop); From 4e949e9b9d1e3edcdab3b54656c5851bd9e49c67 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Apr 2018 14:57:17 -0400 Subject: [PATCH 622/660] perf/x86/intel: Don't enable freeze-on-smi for PerfMon V1 The SMM freeze feature was introduced since PerfMon V2. But the current code unconditionally enables the feature for all platforms. It can generate #GP exception, if the related FREEZE_WHILE_SMM bit is set for the machine with PerfMon V1. To disable the feature for PerfMon V1, perf needs to - Remove the freeze_on_smi sysfs entry by moving intel_pmu_attrs to intel_pmu, which is only applied to PerfMon V2 and later. - Check the PerfMon version before flipping the SMM bit when starting CPU Fixes: 6089327f5424 ("perf/x86: Add sysfs entry to freeze counters on SMI") Signed-off-by: Kan Liang Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: ak@linux.intel.com Cc: eranian@google.com Cc: acme@redhat.com Link: https://lkml.kernel.org/r/1524682637-63219-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 607bf565a90c..707b2a96e516 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3339,7 +3339,8 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->lbr_sel = NULL; - flip_smm_bit(&x86_pmu.attr_freeze_on_smi); + if (x86_pmu.version > 1) + flip_smm_bit(&x86_pmu.attr_freeze_on_smi); if (!cpuc->shared_regs) return; @@ -3502,6 +3503,8 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_dying = intel_pmu_cpu_dying, }; +static struct attribute *intel_pmu_attrs[]; + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -3533,6 +3536,8 @@ static __initconst const struct x86_pmu intel_pmu = { .format_attrs = intel_arch3_formats_attr, .events_sysfs_show = intel_event_sysfs_show, + .attrs = intel_pmu_attrs, + .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, @@ -3911,8 +3916,6 @@ __init int intel_pmu_init(void) x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); - - x86_pmu.attrs = intel_pmu_attrs; /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events, when not running in a hypervisor: From 7ef79ad52136712172eb0525bf0b462516bf2f93 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 26 Apr 2018 00:44:46 -0400 Subject: [PATCH 623/660] ext4: add MODULE_SOFTDEP to ensure crc32c is included in the initramfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: a45403b51582 ("ext4: always initialize the crc32c checksum driver") Reported-by: François Valenduc Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 185f7e61f4cf..eb104e8476f0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5886,5 +5886,6 @@ static void __exit ext4_exit_fs(void) MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc32c"); module_init(ext4_init_fs) module_exit(ext4_exit_fs) From 9124130573950dcfc06b6a59306edfda2fc33ec7 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 23 Apr 2018 11:29:22 -0700 Subject: [PATCH 624/660] x86/cpufeatures: Enumerate cldemote instruction cldemote is a new instruction in future x86 processors. It hints to hardware that a specified cache line should be moved ("demoted") from the cache(s) closest to the processor core to a level more distant from the processor core. This instruction is faster than snooping to make the cache line available for other cores. cldemote instruction is indicated by the presence of the CPUID feature flag CLDEMOTE (CPUID.(EAX=0x7, ECX=0):ECX[bit25]). More details on cldemote instruction can be found in the latest Intel Architecture Instruction Set Extensions and Future Features Programming Reference. Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "H. Peter Anvin" Cc: "Ashok Raj" Link: https://lkml.kernel.org/r/1524508162-192587-1-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d554c11e01ff..578793e97431 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -320,6 +320,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ From e3072805c61167b85a30ceeef606620704db31f7 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 25 Apr 2018 10:05:53 +0800 Subject: [PATCH 625/660] x86/vector: Remove the macro VECTOR_OFFSET_START Now, Linux uses matrix allocator for vector assignment, the original assignment code which used VECTOR_OFFSET_START has been removed. So remove the stale macro as well. Fixes: commit 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Acked-by: David Rientjes Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/20180425020553.17210-1-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 404c5fdff859..57003074bd7a 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -34,11 +34,6 @@ * (0x80 is the syscall vector, 0x30-0x3f are for ISA) */ #define FIRST_EXTERNAL_VECTOR 0x20 -/* - * We start allocating at 0x21 to spread out vectors evenly between - * priority levels. (0x80 is the syscall vector) - */ -#define VECTOR_OFFSET_START 1 /* * Reserve the lowest usable vector (and hence lowest priority) 0x20 for From 2891d4feae7c2cf0a56d84bf38519aae6c5060b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 26 Apr 2018 10:29:57 +0200 Subject: [PATCH 626/660] Revert "ARM: amba: Fix race condition with driver_override" This reverts commit 6b614a87f3f477571e319281e84dba11e0ea0a76. My backport was incorrect, as Geert pointed out :( Reported-by: Geert Uytterhoeven Cc: Todd Kjos Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/amba/bus.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 8e6ac3031662..fac8e36de11e 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -69,15 +69,11 @@ static ssize_t driver_override_show(struct device *_dev, struct device_attribute *attr, char *buf) { struct amba_device *dev = to_amba_device(_dev); - ssize_t len; if (!dev->driver_override) return 0; - device_lock(_dev); - len = sprintf(buf, "%s\n", dev->driver_override); - device_unlock(_dev); - return len; + return sprintf(buf, "%s\n", dev->driver_override); } static ssize_t driver_override_store(struct device *_dev, @@ -85,7 +81,7 @@ static ssize_t driver_override_store(struct device *_dev, const char *buf, size_t count) { struct amba_device *dev = to_amba_device(_dev); - char *driver_override, *old, *cp; + char *driver_override, *old = dev->driver_override, *cp; /* We need to keep extra room for a newline */ if (count >= (PAGE_SIZE - 1)) @@ -99,15 +95,12 @@ static ssize_t driver_override_store(struct device *_dev, if (cp) *cp = '\0'; - device_lock(_dev); - old = dev->driver_override; if (strlen(driver_override)) { dev->driver_override = driver_override; } else { kfree(driver_override); dev->driver_override = NULL; } - device_unlock(_dev); kfree(old); From 5f53624662eaac89598641cee6cd54fc192572d9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Apr 2018 15:21:43 +0200 Subject: [PATCH 627/660] ARM: amba: Make driver_override output consistent with other buses For AMBA devices with unconfigured driver override, the "driver_override" sysfs virtual file is empty, while it contains "(null)" for platform and PCI devices. Make AMBA consistent with other buses by dropping the test for a NULL pointer. Note that contrary to popular belief, sprintf() handles NULL pointers fine; they are printed as "(null)". Signed-off-by: Geert Uytterhoeven Cc: stable Reviewed-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/amba/bus.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index fac8e36de11e..f8c01fbef64d 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -70,9 +70,6 @@ static ssize_t driver_override_show(struct device *_dev, { struct amba_device *dev = to_amba_device(_dev); - if (!dev->driver_override) - return 0; - return sprintf(buf, "%s\n", dev->driver_override); } From 6a7228d90d42bcacfe38786756ba62762b91c20a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Apr 2018 15:21:44 +0200 Subject: [PATCH 628/660] ARM: amba: Fix race condition with driver_override The driver_override implementation is susceptible to a race condition when different threads are reading vs storing a different driver override. Add locking to avoid this race condition. Cfr. commits 6265539776a0810b ("driver core: platform: fix race condition with driver_override") and 9561475db680f714 ("PCI: Fix race condition with driver_override"). Fixes: 3cf385713460eb2b ("ARM: 8256/1: driver coamba: add device binding path 'driver_override'") Signed-off-by: Geert Uytterhoeven Reviewed-by: Todd Kjos Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/amba/bus.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index f8c01fbef64d..4a3ac31c07d0 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -69,8 +69,12 @@ static ssize_t driver_override_show(struct device *_dev, struct device_attribute *attr, char *buf) { struct amba_device *dev = to_amba_device(_dev); + ssize_t len; - return sprintf(buf, "%s\n", dev->driver_override); + device_lock(_dev); + len = sprintf(buf, "%s\n", dev->driver_override); + device_unlock(_dev); + return len; } static ssize_t driver_override_store(struct device *_dev, @@ -78,7 +82,7 @@ static ssize_t driver_override_store(struct device *_dev, const char *buf, size_t count) { struct amba_device *dev = to_amba_device(_dev); - char *driver_override, *old = dev->driver_override, *cp; + char *driver_override, *old, *cp; /* We need to keep extra room for a newline */ if (count >= (PAGE_SIZE - 1)) @@ -92,12 +96,15 @@ static ssize_t driver_override_store(struct device *_dev, if (cp) *cp = '\0'; + device_lock(_dev); + old = dev->driver_override; if (strlen(driver_override)) { dev->driver_override = driver_override; } else { kfree(driver_override); dev->driver_override = NULL; } + device_unlock(_dev); kfree(old); From 7d878817db22f64c2b2c241335ec03e4c3fd5476 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 26 Apr 2018 14:08:32 +0800 Subject: [PATCH 629/660] x86/vector: Remove the unused macro FPU_IRQ The macro FPU_IRQ has never been used since v3.10, So remove it. Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/20180426060832.27312-1-douly.fnst@cn.fujitsu.com --- arch/x86/include/asm/irq_vectors.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 57003074bd7a..548d90bbf919 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -114,8 +114,6 @@ #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif -#define FPU_IRQ 13 - /* * Size the maximum number of interrupts. * From 1f71addd34f4c442bec7d7c749acc1beb58126f2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 24 Apr 2018 21:22:18 +0200 Subject: [PATCH 630/660] tick/sched: Do not mess with an enqueued hrtimer Kaike reported that in tests rdma hrtimers occasionaly stopped working. He did great debugging, which provided enough context to decode the problem. CPU 3 CPU 2 idle start sched_timer expires = 712171000000 queue->next = sched_timer start rdmavt timer. expires = 712172915662 lock(baseof(CPU3)) tick_nohz_stop_tick() tick = 716767000000 timerqueue_add(tmr) hrtimer_set_expires(sched_timer, tick); sched_timer->expires = 716767000000 <---- FAIL if (tmr->expires < queue->next->expires) hrtimer_start(sched_timer) queue->next = tmr; lock(baseof(CPU3)) unlock(baseof(CPU3)) timerqueue_remove() timerqueue_add() ts->sched_timer is queued and queue->next is pointing to it, but then ts->sched_timer.expires is modified. This not only corrupts the ordering of the timerqueue RB tree, it also makes CPU2 see the new expiry time of timerqueue->next->expires when checking whether timerqueue->next needs to be updated. So CPU2 sees that the rdma timer is earlier than timerqueue->next and sets the rdma timer as new next. Depending on whether it had also seen the new time at RB tree enqueue, it might have queued the rdma timer at the wrong place and then after removing the sched_timer the RB tree is completely hosed. The problem was introduced with a commit which tried to solve inconsistency between the hrtimer in the tick_sched data and the underlying hardware clockevent. It split out hrtimer_set_expires() to store the new tick time in both the NOHZ and the NOHZ + HIGHRES case, but missed the fact that in the NOHZ + HIGHRES case the hrtimer might still be queued. Use hrtimer_start(timer, tick...) for the NOHZ + HIGHRES case which sets timer->expires after canceling the timer and move the hrtimer_set_expires() invocation into the NOHZ only code path which is not affected as it merily uses the hrtimer as next event storage so code pathes can be shared with the NOHZ + HIGHRES case. Fixes: d4af6d933ccf ("nohz: Fix spurious warning when hrtimer and clockevent get out of sync") Reported-by: "Wan Kaike" Signed-off-by: Thomas Gleixner Acked-by: Frederic Weisbecker Cc: "Marciniszyn Mike" Cc: Anna-Maria Gleixner Cc: linux-rdma@vger.kernel.org Cc: "Dalessandro Dennis" Cc: "Fleck John" Cc: stable@vger.kernel.org Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: "Weiny Ira" Cc: "linux-rdma@vger.kernel.org" Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1804241637390.1679@nanos.tec.linutronix.de Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1804242119210.1597@nanos.tec.linutronix.de --- kernel/time/tick-sched.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 646645e981f9..d31bec177fa5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -804,12 +804,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) return; } - hrtimer_set_expires(&ts->sched_timer, tick); - - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) - hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); - else + if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { + hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); + } else { + hrtimer_set_expires(&ts->sched_timer, tick); tick_program_event(tick, 1); + } } static void tick_nohz_retain_tick(struct tick_sched *ts) From a3ed0e4393d6885b4af7ce84b437dc696490a530 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Apr 2018 15:33:38 +0200 Subject: [PATCH 631/660] Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME Revert commits 92af4dcb4e1c ("tracing: Unify the "boot" and "mono" tracing clocks") 127bfa5f4342 ("hrtimer: Unify MONOTONIC and BOOTTIME clock behavior") 7250a4047aa6 ("posix-timers: Unify MONOTONIC and BOOTTIME clock behavior") d6c7270e913d ("timekeeping: Remove boot time specific code") f2d6fdbfd238 ("Input: Evdev - unify MONOTONIC and BOOTTIME clock behavior") d6ed449afdb3 ("timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock") 72199320d49d ("timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock") As stated in the pull request for the unification of CLOCK_MONOTONIC and CLOCK_BOOTTIME, it was clear that we might have to revert the change. As reported by several folks systemd and other applications rely on the documented behaviour of CLOCK_MONOTONIC on Linux and break with the above changes. After resume daemons time out and other timeout related issues are observed. Rafael compiled this list: * systemd kills daemons on resume, after >WatchdogSec seconds of suspending (Genki Sky). [Verified that that's because systemd uses CLOCK_MONOTONIC and expects it to not include the suspend time.] * systemd-journald misbehaves after resume: systemd-journald[7266]: File /var/log/journal/016627c3c4784cd4812d4b7e96a34226/system.journal corrupted or uncleanly shut down, renaming and replacing. (Mike Galbraith). * NetworkManager reports "networking disabled" and networking is broken after resume 50% of the time (Pavel). [May be because of systemd.] * MATE desktop dims the display and starts the screensaver right after system resume (Pavel). * Full system hang during resume (me). [May be due to systemd or NM or both.] That happens on debian and open suse systems. It's sad, that these problems were neither catched in -next nor by those folks who expressed interest in this change. Reported-by: Rafael J. Wysocki Reported-by: Genki Sky , Reported-by: Pavel Machek Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt --- Documentation/trace/ftrace.rst | 14 ++++-- drivers/input/evdev.c | 7 ++- include/linux/hrtimer.h | 2 + include/linux/timekeeper_internal.h | 2 - include/linux/timekeeping.h | 37 +++++++++----- include/uapi/linux/time.h | 1 - kernel/time/hrtimer.c | 16 +++++- kernel/time/posix-stubs.c | 2 - kernel/time/posix-timers.c | 26 ++++++---- kernel/time/tick-common.c | 15 ------ kernel/time/tick-internal.h | 6 --- kernel/time/tick-sched.c | 9 ---- kernel/time/timekeeping.c | 78 ++++++++++++++--------------- kernel/time/timekeeping.h | 1 + kernel/trace/trace.c | 2 +- 15 files changed, 114 insertions(+), 104 deletions(-) diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index e45f0786f3f9..67d9c38e95eb 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -461,9 +461,17 @@ of ftrace. Here is a list of some of the key files: and ticks at the same rate as the hardware clocksource. boot: - Same as mono. Used to be a separate clock which accounted - for the time spent in suspend while CLOCK_MONOTONIC did - not. + This is the boot clock (CLOCK_BOOTTIME) and is based on the + fast monotonic clock, but also accounts for time spent in + suspend. Since the clock access is designed for use in + tracing in the suspend path, some side effects are possible + if clock is accessed after the suspend time is accounted before + the fast mono clock is updated. In this case, the clock update + appears to happen slightly sooner than it normally would have. + Also on 32-bit systems, it's possible that the 64-bit boot offset + sees a partial update. These effects are rare and post + processing should be able to handle them. See comments in the + ktime_get_boot_fast_ns() function for more information. To set a clock, simply echo the clock name into this file:: diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 46115a392098..c81c79d01d93 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -31,6 +31,7 @@ enum evdev_clock_type { EV_CLK_REAL = 0, EV_CLK_MONO, + EV_CLK_BOOT, EV_CLK_MAX }; @@ -197,10 +198,12 @@ static int evdev_set_clk_type(struct evdev_client *client, unsigned int clkid) case CLOCK_REALTIME: clk_type = EV_CLK_REAL; break; - case CLOCK_BOOTTIME: case CLOCK_MONOTONIC: clk_type = EV_CLK_MONO; break; + case CLOCK_BOOTTIME: + clk_type = EV_CLK_BOOT; + break; default: return -EINVAL; } @@ -311,6 +314,8 @@ static void evdev_events(struct input_handle *handle, ev_time[EV_CLK_MONO] = ktime_get(); ev_time[EV_CLK_REAL] = ktime_mono_to_real(ev_time[EV_CLK_MONO]); + ev_time[EV_CLK_BOOT] = ktime_mono_to_any(ev_time[EV_CLK_MONO], + TK_OFFS_BOOT); rcu_read_lock(); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index a2656c3ebe81..3892e9c8b2de 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -161,9 +161,11 @@ struct hrtimer_clock_base { enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, + HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, HRTIMER_BASE_MONOTONIC_SOFT, HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 4b3dca173e89..7acb953298a7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -52,7 +52,6 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @time_suspended: Accumulated suspend time * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events @@ -95,7 +94,6 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - ktime_t time_suspended; s32 tai_offset; unsigned int clock_was_set_seq; u8 cs_was_changed_seq; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 9737fbec7019..588a0e4b1ab9 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -33,25 +33,20 @@ extern void ktime_get_ts64(struct timespec64 *ts); extern time64_t ktime_get_seconds(void); extern time64_t __ktime_get_real_seconds(void); extern time64_t ktime_get_real_seconds(void); -extern void ktime_get_active_ts64(struct timespec64 *ts); extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); extern void getboottime64(struct timespec64 *ts); -#define ktime_get_real_ts64(ts) getnstimeofday64(ts) - -/* Clock BOOTTIME compatibility wrappers */ -static inline void get_monotonic_boottime64(struct timespec64 *ts) -{ - ktime_get_ts64(ts); -} +#define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* * ktime_t based interfaces */ + enum tk_offsets { TK_OFFS_REAL, + TK_OFFS_BOOT, TK_OFFS_TAI, TK_OFFS_MAX, }; @@ -62,10 +57,6 @@ extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); extern ktime_t ktime_get_raw(void); extern u32 ktime_get_resolution_ns(void); -/* Clock BOOTTIME compatibility wrappers */ -static inline ktime_t ktime_get_boottime(void) { return ktime_get(); } -static inline u64 ktime_get_boot_ns(void) { return ktime_get(); } - /** * ktime_get_real - get the real (wall-) time in ktime_t format */ @@ -74,6 +65,17 @@ static inline ktime_t ktime_get_real(void) return ktime_get_with_offset(TK_OFFS_REAL); } +/** + * ktime_get_boottime - Returns monotonic time since boot in ktime_t format + * + * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the + * time spent in suspend. + */ +static inline ktime_t ktime_get_boottime(void) +{ + return ktime_get_with_offset(TK_OFFS_BOOT); +} + /** * ktime_get_clocktai - Returns the TAI time of day in ktime_t format */ @@ -100,6 +102,11 @@ static inline u64 ktime_get_real_ns(void) return ktime_to_ns(ktime_get_real()); } +static inline u64 ktime_get_boot_ns(void) +{ + return ktime_to_ns(ktime_get_boottime()); +} + static inline u64 ktime_get_tai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); @@ -112,11 +119,17 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); +extern u64 ktime_get_boot_fast_ns(void); extern u64 ktime_get_real_fast_ns(void); /* * timespec64 interfaces utilizing the ktime based ones */ +static inline void get_monotonic_boottime64(struct timespec64 *ts) +{ + *ts = ktime_to_timespec64(ktime_get_boottime()); +} + static inline void timekeeping_clocktai64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_clocktai()); diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 16a296612ba4..4c0338ea308a 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -73,7 +73,6 @@ struct __kernel_old_timeval { */ #define CLOCK_SGI_CYCLE 10 #define CLOCK_TAI 11 -#define CLOCK_MONOTONIC_ACTIVE 12 #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index eda1210ce50f..14e858753d76 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -90,6 +90,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, }, + { + .index = HRTIMER_BASE_BOOTTIME, + .clockid = CLOCK_BOOTTIME, + .get_time = &ktime_get_boottime, + }, { .index = HRTIMER_BASE_TAI, .clockid = CLOCK_TAI, @@ -105,6 +110,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, }, + { + .index = HRTIMER_BASE_BOOTTIME_SOFT, + .clockid = CLOCK_BOOTTIME, + .get_time = &ktime_get_boottime, + }, { .index = HRTIMER_BASE_TAI_SOFT, .clockid = CLOCK_TAI, @@ -119,7 +129,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, - [CLOCK_BOOTTIME] = HRTIMER_BASE_MONOTONIC, + [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, [CLOCK_TAI] = HRTIMER_BASE_TAI, }; @@ -571,12 +581,14 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; + ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, - offs_real, offs_tai); + offs_real, offs_boot, offs_tai); base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; + base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; return now; diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index e0dbae98db9d..69a937c3cd81 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -83,8 +83,6 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp) case CLOCK_BOOTTIME: get_monotonic_boottime64(tp); break; - case CLOCK_MONOTONIC_ACTIVE: - ktime_get_active_ts64(tp); default: return -EINVAL; } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index b6899b5060bd..10b7186d0638 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -252,16 +252,15 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 * return 0; } -static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) +static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp) { - timekeeping_clocktai64(tp); + get_monotonic_boottime64(tp); return 0; } -static int posix_get_monotonic_active(clockid_t which_clock, - struct timespec64 *tp) +static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) { - ktime_get_active_ts64(tp); + timekeeping_clocktai64(tp); return 0; } @@ -1317,9 +1316,19 @@ static const struct k_clock clock_tai = { .timer_arm = common_hrtimer_arm, }; -static const struct k_clock clock_monotonic_active = { +static const struct k_clock clock_boottime = { .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_monotonic_active, + .clock_get = posix_get_boottime, + .nsleep = common_nsleep, + .timer_create = common_timer_create, + .timer_set = common_timer_set, + .timer_get = common_timer_get, + .timer_del = common_timer_del, + .timer_rearm = common_hrtimer_rearm, + .timer_forward = common_hrtimer_forward, + .timer_remaining = common_hrtimer_remaining, + .timer_try_to_cancel = common_hrtimer_try_to_cancel, + .timer_arm = common_hrtimer_arm, }; static const struct k_clock * const posix_clocks[] = { @@ -1330,11 +1339,10 @@ static const struct k_clock * const posix_clocks[] = { [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, - [CLOCK_BOOTTIME] = &clock_monotonic, + [CLOCK_BOOTTIME] = &clock_boottime, [CLOCK_REALTIME_ALARM] = &alarm_clock, [CLOCK_BOOTTIME_ALARM] = &alarm_clock, [CLOCK_TAI] = &clock_tai, - [CLOCK_MONOTONIC_ACTIVE] = &clock_monotonic_active, }; static const struct k_clock *clockid_to_kclock(const clockid_t id) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 099572ca4a8f..49edc1c4f3e6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -419,19 +419,6 @@ void tick_suspend_local(void) clockevents_shutdown(td->evtdev); } -static void tick_forward_next_period(void) -{ - ktime_t delta, now = ktime_get(); - u64 n; - - delta = ktime_sub(now, tick_next_period); - n = ktime_divns(delta, tick_period); - tick_next_period += n * tick_period; - if (tick_next_period < now) - tick_next_period += tick_period; - tick_sched_forward_next_period(); -} - /** * tick_resume_local - Resume the local tick device * @@ -444,8 +431,6 @@ void tick_resume_local(void) struct tick_device *td = this_cpu_ptr(&tick_cpu_device); bool broadcast = tick_resume_check_broadcast(); - tick_forward_next_period(); - clockevents_tick_resume(td->evtdev); if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 21efab7485ca..e277284c2831 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -141,12 +141,6 @@ static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } #endif /* !(BROADCAST && ONESHOT) */ -#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) -extern void tick_sched_forward_next_period(void); -#else -static inline void tick_sched_forward_next_period(void) { } -#endif - /* NO_HZ_FULL internal */ #ifdef CONFIG_NO_HZ_FULL extern void tick_nohz_init(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d31bec177fa5..da9455a6b42b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -51,15 +51,6 @@ struct tick_sched *tick_get_tick_sched(int cpu) */ static ktime_t last_jiffies_update; -/* - * Called after resume. Make sure that jiffies are not fast forwarded due to - * clock monotonic being forwarded by the suspended time. - */ -void tick_sched_forward_next_period(void) -{ - last_jiffies_update = tick_next_period; -} - /* * Must be called with interrupts disabled ! */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dcf7f20fcd12..49cbceef5deb 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -138,12 +138,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { - /* Update both bases so mono and raw stay coupled. */ - tk->tkr_mono.base += delta; - tk->tkr_raw.base += delta; - - /* Accumulate time spent in suspend */ - tk->time_suspended += delta; + tk->offs_boot = ktime_add(tk->offs_boot, delta); } /* @@ -473,6 +468,36 @@ u64 ktime_get_raw_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); +/** + * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. + * + * To keep it NMI safe since we're accessing from tracing, we're not using a + * separate timekeeper with updates to monotonic clock and boot offset + * protected with seqlocks. This has the following minor side effects: + * + * (1) Its possible that a timestamp be taken after the boot offset is updated + * but before the timekeeper is updated. If this happens, the new boot offset + * is added to the old timekeeping making the clock appear to update slightly + * earlier: + * CPU 0 CPU 1 + * timekeeping_inject_sleeptime64() + * __timekeeping_inject_sleeptime(tk, delta); + * timestamp(); + * timekeeping_update(tk, TK_CLEAR_NTP...); + * + * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be + * partially updated. Since the tk->offs_boot update is a rare event, this + * should be a rare occurrence which postprocessing should be able to handle. + */ +u64 notrace ktime_get_boot_fast_ns(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); +} +EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); + + /* * See comment for __ktime_get_fast_ns() vs. timestamp ordering */ @@ -764,6 +789,7 @@ EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); static ktime_t *offsets[TK_OFFS_MAX] = { [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, + [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, }; @@ -860,39 +886,6 @@ void ktime_get_ts64(struct timespec64 *ts) } EXPORT_SYMBOL_GPL(ktime_get_ts64); -/** - * ktime_get_active_ts64 - Get the active non-suspended monotonic clock - * @ts: pointer to timespec variable - * - * The function calculates the monotonic clock from the realtime clock and - * the wall_to_monotonic offset, subtracts the accumulated suspend time and - * stores the result in normalized timespec64 format in the variable - * pointed to by @ts. - */ -void ktime_get_active_ts64(struct timespec64 *ts) -{ - struct timekeeper *tk = &tk_core.timekeeper; - struct timespec64 tomono, tsusp; - u64 nsec, nssusp; - unsigned int seq; - - WARN_ON(timekeeping_suspended); - - do { - seq = read_seqcount_begin(&tk_core.seq); - ts->tv_sec = tk->xtime_sec; - nsec = timekeeping_get_ns(&tk->tkr_mono); - tomono = tk->wall_to_monotonic; - nssusp = tk->time_suspended; - } while (read_seqcount_retry(&tk_core.seq, seq)); - - ts->tv_sec += tomono.tv_sec; - ts->tv_nsec = 0; - timespec64_add_ns(ts, nsec + tomono.tv_nsec); - tsusp = ns_to_timespec64(nssusp); - *ts = timespec64_sub(*ts, tsusp); -} - /** * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC * @@ -1593,6 +1586,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, return; } tk_xtime_add(tk, delta); + tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); tk_debug_account_sleep_time(delta); } @@ -2125,7 +2119,7 @@ out: void getboottime64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; - ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended); + ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); *ts = ktime_to_timespec64(t); } @@ -2188,6 +2182,7 @@ void do_timer(unsigned long ticks) * ktime_get_update_offsets_now - hrtimer helper * @cwsseq: pointer to check and store the clock was set sequence number * @offs_real: pointer to storage for monotonic -> realtime offset + * @offs_boot: pointer to storage for monotonic -> boottime offset * @offs_tai: pointer to storage for monotonic -> clock tai offset * * Returns current monotonic time and updates the offsets if the @@ -2197,7 +2192,7 @@ void do_timer(unsigned long ticks) * Called from hrtimer_interrupt() or retrigger_next_event() */ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, - ktime_t *offs_tai) + ktime_t *offs_boot, ktime_t *offs_tai) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; @@ -2214,6 +2209,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, if (*cwsseq != tk->clock_was_set_seq) { *cwsseq = tk->clock_was_set_seq; *offs_real = tk->offs_real; + *offs_boot = tk->offs_boot; *offs_tai = tk->offs_tai; } diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 79b67f5e0343..7a9b4eb7a1d5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -6,6 +6,7 @@ */ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, + ktime_t *offs_boot, ktime_t *offs_tai); extern int timekeeping_valid_for_hres(void); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dfbcf9ee1447..414d7210b2ec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1165,7 +1165,7 @@ static struct { { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, - { ktime_get_mono_fast_ns, "boot", 1 }, + { ktime_get_boot_fast_ns, "boot", 1 }, ARCH_TRACE_CLOCKS }; From 14d12bb8582e158006c35cce0f8ae1706094f9a4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 12 Apr 2018 14:39:10 +0200 Subject: [PATCH 632/660] x86/mm: Make vmemmap and vmalloc base address constants unsigned long Commits 9b46a051e4 ("x86/mm: Initialize vmemmap_base at boot-time") and a7412546d8 ("x86/mm: Adjust vmalloc base and size at boot-time") lost the type information for __VMALLOC_BASE_L4, __VMALLOC_BASE_L5, __VMEMMAP_BASE_L4 and __VMEMMAP_BASE_L5 constants. Declare them explicitly unsigned long again. Fixes: 9b46a051e4 ("x86/mm: Initialize vmemmap_base at boot-time") Fixes: a7412546d8 ("x86/mm: Adjust vmalloc base and size at boot-time") Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Acked-by: "Kirill A. Shutemov" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1804121437350.28129@cbobk.fhfr.pm --- arch/x86/include/asm/pgtable_64_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index d5c21a382475..adb47552e6bb 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -105,14 +105,14 @@ extern unsigned int ptrs_per_p4d; #define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) -#define __VMALLOC_BASE_L4 0xffffc90000000000 -#define __VMALLOC_BASE_L5 0xffa0000000000000 +#define __VMALLOC_BASE_L4 0xffffc90000000000UL +#define __VMALLOC_BASE_L5 0xffa0000000000000UL #define VMALLOC_SIZE_TB_L4 32UL #define VMALLOC_SIZE_TB_L5 12800UL -#define __VMEMMAP_BASE_L4 0xffffea0000000000 -#define __VMEMMAP_BASE_L5 0xffd4000000000000 +#define __VMEMMAP_BASE_L4 0xffffea0000000000UL +#define __VMEMMAP_BASE_L5 0xffd4000000000000UL #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base From 0f925660a7bc49b269c163249a5d06da3a0c7b0a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 26 Apr 2018 22:00:29 +0900 Subject: [PATCH 633/660] ALSA: dice: fix error path to destroy initialized stream data In error path of snd_dice_stream_init_duplex(), stream data for incoming packet can be left to be initialized. This commit fixes it. Fixes: 436b5abe2224 ('ALSA: dice: handle whole available isochronous streams') Cc: # v4.6+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index 8573289c381e..928a255bfc35 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -435,7 +435,7 @@ int snd_dice_stream_init_duplex(struct snd_dice *dice) err = init_stream(dice, AMDTP_IN_STREAM, i); if (err < 0) { for (; i >= 0; i--) - destroy_stream(dice, AMDTP_OUT_STREAM, i); + destroy_stream(dice, AMDTP_IN_STREAM, i); goto end; } } From da6fa7ef67f07108a1b0cb9fd9e7fcaabd39c051 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 3 Apr 2018 09:02:28 -0500 Subject: [PATCH 634/660] x86/smpboot: Don't use mwait_play_dead() on AMD systems Recent AMD systems support using MWAIT for C1 state. However, MWAIT will not allow deeper cstates than C1 on current systems. play_dead() expects to use the deepest state available. The deepest state available on AMD systems is reached through SystemIO or HALT. If MWAIT is available, it is preferred over the other methods, so the CPU never reaches the deepest possible state. Don't try to use MWAIT to play_dead() on AMD systems. Instead, use CPUIDLE to enter the deepest state advertised by firmware. If CPUIDLE is not available then fallback to HALT. Signed-off-by: Yazen Ghannam Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: stable@vger.kernel.org Cc: Yazen Ghannam Link: https://lkml.kernel.org/r/20180403140228.58540-1-Yazen.Ghannam@amd.com --- arch/x86/kernel/smpboot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 45175b81dd5b..0f1cbb042f49 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1571,6 +1571,8 @@ static inline void mwait_play_dead(void) void *mwait_ptr; int i; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return; if (!this_cpu_has(X86_FEATURE_MWAIT)) return; if (!this_cpu_has(X86_FEATURE_CLFLUSH)) From e13db2d3371ee3fb77cd1991756c74e6d02ad41e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 17 Apr 2018 14:53:08 +0200 Subject: [PATCH 635/660] MAINTAINERS: Remove myself as maintainer I am leaving Axis, so this address will bounce in the not too distant future. Fortunately, I will still be working with the community. Signed-off-by: Niklas Cassel Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 92be777d060a..d29487e42ec4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1208,7 +1208,6 @@ F: drivers/*/*alpine* ARM/ARTPEC MACHINE SUPPORT M: Jesper Nilsson M: Lars Persson -M: Niklas Cassel S: Maintained L: linux-arm-kernel@axis.com F: arch/arm/mach-artpec @@ -10909,7 +10908,6 @@ F: drivers/pci/host/ F: drivers/pci/dwc/ PCIE DRIVER FOR AXIS ARTPEC -M: Niklas Cassel M: Jesper Nilsson L: linux-arm-kernel@axis.com L: linux-pci@vger.kernel.org From 1c3bc8fb10c1803f8651911722ed584db3dfb0f2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 17 Apr 2018 10:53:11 +0200 Subject: [PATCH 636/660] ARM: dts: Fix NAS4220B pin config The DTS file for the NAS4220B had the pin config for the ethernet interface set to the pins in the SL3512 SoC while this system is using SL3516. Fix it by referencing the right SL3516 pins instead of the SL3512 pins. Cc: stable@vger.kernel.org Cc: Hans Ulli Kroll Reported-by: Andreas Fiedler Reported-by: Roman Yeryomin Tested-by: Roman Yeryomin Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/gemini-nas4220b.dts | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/boot/dts/gemini-nas4220b.dts b/arch/arm/boot/dts/gemini-nas4220b.dts index 8bbb6f85d161..4785fbcc41ed 100644 --- a/arch/arm/boot/dts/gemini-nas4220b.dts +++ b/arch/arm/boot/dts/gemini-nas4220b.dts @@ -134,37 +134,37 @@ function = "gmii"; groups = "gmii_gmac0_grp"; }; - /* Settings come from OpenWRT */ + /* Settings come from OpenWRT, pins on SL3516 */ conf0 { - pins = "R8 GMAC0 RXDV", "U11 GMAC1 RXDV"; + pins = "V8 GMAC0 RXDV", "T10 GMAC1 RXDV"; skew-delay = <0>; }; conf1 { - pins = "T8 GMAC0 RXC", "T11 GMAC1 RXC"; + pins = "Y7 GMAC0 RXC", "Y11 GMAC1 RXC"; skew-delay = <15>; }; conf2 { - pins = "P8 GMAC0 TXEN", "V11 GMAC1 TXEN"; + pins = "T8 GMAC0 TXEN", "W11 GMAC1 TXEN"; skew-delay = <7>; }; conf3 { - pins = "V7 GMAC0 TXC"; + pins = "U8 GMAC0 TXC"; skew-delay = <11>; }; conf4 { - pins = "P10 GMAC1 TXC"; + pins = "V11 GMAC1 TXC"; skew-delay = <10>; }; conf5 { /* The data lines all have default skew */ - pins = "U8 GMAC0 RXD0", "V8 GMAC0 RXD1", - "P9 GMAC0 RXD2", "R9 GMAC0 RXD3", - "U7 GMAC0 TXD0", "T7 GMAC0 TXD1", - "R7 GMAC0 TXD2", "P7 GMAC0 TXD3", - "R11 GMAC1 RXD0", "P11 GMAC1 RXD1", - "V12 GMAC1 RXD2", "U12 GMAC1 RXD3", - "R10 GMAC1 TXD0", "T10 GMAC1 TXD1", - "U10 GMAC1 TXD2", "V10 GMAC1 TXD3"; + pins = "W8 GMAC0 RXD0", "V9 GMAC0 RXD1", + "Y8 GMAC0 RXD2", "U9 GMAC0 RXD3", + "T7 GMAC0 TXD0", "U6 GMAC0 TXD1", + "V7 GMAC0 TXD2", "U7 GMAC0 TXD3", + "Y12 GMAC1 RXD0", "V12 GMAC1 RXD1", + "T11 GMAC1 RXD2", "W12 GMAC1 RXD3", + "U10 GMAC1 TXD0", "Y10 GMAC1 TXD1", + "W10 GMAC1 TXD2", "T9 GMAC1 TXD3"; skew-delay = <7>; }; /* Set up drive strength on GMAC0 to 16 mA */ From 4b313ca7b661ab8782f3dcb4a8996632a470b4da Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 19 Apr 2018 22:14:02 +0800 Subject: [PATCH 637/660] HISI LPC: Add Kconfig MFD_CORE dependency For ACPI support of the HiSilicon LPC driver we depend on MFD_CORE config. Currently the HiSi LPC Kconfig entry does not define this dependency, so add it. The reason for depending on MFD_CORE in the driver is that we model the LPC host as an MFD, in that a platform device will be created for each device on the bus. We do this as we need to modify the resources of these derived platform devices, something which we should not do to the original devices created in the ACPI scan. Details in e0aa1563f894 ("HISI LPC: Add ACPI support"). Fixes: e0aa1563f894 ("HISI LPC: Add ACPI support") Reported-and-tested-by: Tan Xiaojun Signed-off-by: John Garry Signed-off-by: Arnd Bergmann --- drivers/bus/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index d1c0b60e9326..6dc177bf4c42 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -33,6 +33,7 @@ config HISILICON_LPC bool "Support for ISA I/O space on HiSilicon Hip06/7" depends on ARM64 && (ARCH_HISI || COMPILE_TEST) select INDIRECT_PIO + select MFD_CORE if ACPI help Driver to enable I/O access to devices attached to the Low Pin Count bus on the HiSilicon Hip06/7 SoC. From ef740508a2e2f26afa25b3a616bef779f9df3b45 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 23 Apr 2018 13:57:00 +0200 Subject: [PATCH 638/660] ARM: s3c24xx: jive: Fix some GPIO names One of the bitbanged SPI hosts had wrongly named GPIO lines due to sloppiness by yours truly. Cc: arm@kernel.org Cc: Mark Brown Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/mach-s3c24xx/mach-jive.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c index 59589a4a0d4b..885e8f12e4b9 100644 --- a/arch/arm/mach-s3c24xx/mach-jive.c +++ b/arch/arm/mach-s3c24xx/mach-jive.c @@ -427,9 +427,9 @@ static struct gpiod_lookup_table jive_wm8750_gpiod_table = { .dev_id = "spi_gpio", .table = { GPIO_LOOKUP("GPIOB", 4, - "gpio-sck", GPIO_ACTIVE_HIGH), + "sck", GPIO_ACTIVE_HIGH), GPIO_LOOKUP("GPIOB", 9, - "gpio-mosi", GPIO_ACTIVE_HIGH), + "mosi", GPIO_ACTIVE_HIGH), GPIO_LOOKUP("GPIOH", 10, "cs", GPIO_ACTIVE_HIGH), { }, From c12d7e9fe9af142bce24f723191c4806da9fecc6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 20 Apr 2018 11:00:44 +0200 Subject: [PATCH 639/660] ARM: defconfig: Update Gemini defconfig This updates the Gemini defconfig with a config that will bring up most of the recently merged and updated devices to some functional level: - We enable high resolution timers (the right thing to do) - Enable CMA for the framebuffer, and the new TVE200 framebuffer driver and the Ilitek ILI9322 driver for graphics on the D-Link DIR-685. HIGHMEM support comes in as part of this. - Enable networking and the new Cortina Gemini ethernet driver. - Enable MDIO over GPIO and the Realtek PHY devices used on several of these systems. - Enable I2C over GPIO and SPI over GPIO which is used on several of these devices. - Enable the Thermal framework, GPIO fan control and LM75 sensor adding cooling on the D-Link DNS-313 with no userspace involved even if only the kernel is working, rock solid thermal for this platform. - Enable JEDEC flash probing to support the Eon flash chip in D-Link DNS-313. - Enable LED disk triggers for the NAS type devices. Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/configs/gemini_defconfig | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/gemini_defconfig b/arch/arm/configs/gemini_defconfig index 2a63fa10c813..553777ac2814 100644 --- a/arch/arm/configs/gemini_defconfig +++ b/arch/arm/configs/gemini_defconfig @@ -1,6 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_USER_NS=y CONFIG_RELAY=y @@ -12,15 +13,21 @@ CONFIG_ARCH_GEMINI=y CONFIG_PCI=y CONFIG_PREEMPT=y CONFIG_AEABI=y +CONFIG_HIGHMEM=y +CONFIG_CMA=y CONFIG_CMDLINE="console=ttyS0,115200n8" CONFIG_KEXEC=y CONFIG_BINFMT_MISC=y CONFIG_PM=y +CONFIG_NET=y +CONFIG_UNIX=y +CONFIG_INET=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y +CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y @@ -33,6 +40,11 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_ATA=y CONFIG_PATA_FTIDE010=y +CONFIG_NETDEVICES=y +CONFIG_GEMINI_ETHERNET=y +CONFIG_MDIO_BITBANG=y +CONFIG_MDIO_GPIO=y +CONFIG_REALTEK_PHY=y CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set @@ -43,9 +55,19 @@ CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set +CONFIG_I2C_GPIO=y +CONFIG_SPI=y +CONFIG_SPI_GPIO=y +CONFIG_SENSORS_GPIO_FAN=y +CONFIG_SENSORS_LM75=y +CONFIG_THERMAL=y CONFIG_WATCHDOG=y -CONFIG_GEMINI_WATCHDOG=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_DRM=y +CONFIG_DRM_PANEL_ILITEK_IL9322=y +CONFIG_DRM_TVE200=y +CONFIG_LOGO=y CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_FOTG210_HCD=y @@ -54,6 +76,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y CONFIG_DMADEVICES=y From 9c55ad1c214d9f8c4594ac2c3fa392c1c32431a7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Apr 2018 19:10:55 +0200 Subject: [PATCH 640/660] libceph: validate con->state at the top of try_write() ceph_con_workfn() validates con->state before calling try_read() and then try_write(). However, try_read() temporarily releases con->mutex, notably in process_message() and ceph_con_in_msg_alloc(), opening the window for ceph_con_close() to sneak in, close the connection and release con->sock. When try_write() is called on the assumption that con->state is still valid (i.e. not STANDBY or CLOSED), a NULL sock gets passed to the networking stack: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: selinux_socket_sendmsg+0x5/0x20 Make sure con->state is valid at the top of try_write() and add an explicit BUG_ON for this, similar to try_read(). Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/23706 Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- net/ceph/messenger.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index fcb40c12b1f8..3b3d33ea9ed8 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2569,6 +2569,11 @@ static int try_write(struct ceph_connection *con) int ret = 1; dout("try_write start %p state %lu\n", con, con->state); + if (con->state != CON_STATE_PREOPEN && + con->state != CON_STATE_CONNECTING && + con->state != CON_STATE_NEGOTIATING && + con->state != CON_STATE_OPEN) + return 0; more: dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); @@ -2594,6 +2599,8 @@ more: } more_kvec: + BUG_ON(!con->sock); + /* kvec data queued? */ if (con->out_kvec_left) { ret = write_partial_kvec(con); From 19791a7ca674fb3009bb068260e852a2f05b605c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 25 Apr 2018 17:13:40 +0100 Subject: [PATCH 641/660] arm64: fix possible spectre-v1 in ptrace_hbp_get_event() It's possible for userspace to control idx. Sanitize idx when using it as an array index. Found by smatch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7a832ce6552f..7ff81fed46e1 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -249,15 +250,20 @@ static struct perf_event *ptrace_hbp_get_event(unsigned int note_type, switch (note_type) { case NT_ARM_HW_BREAK: - if (idx < ARM_MAX_BRP) - bp = tsk->thread.debug.hbp_break[idx]; + if (idx >= ARM_MAX_BRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_BRP); + bp = tsk->thread.debug.hbp_break[idx]; break; case NT_ARM_HW_WATCH: - if (idx < ARM_MAX_WRP) - bp = tsk->thread.debug.hbp_watch[idx]; + if (idx >= ARM_MAX_WRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_WRP); + bp = tsk->thread.debug.hbp_watch[idx]; break; } +out: return bp; } From 41b87599c74300027f305d7b34368ec558978ff2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 25 Apr 2018 17:13:41 +0100 Subject: [PATCH 642/660] KVM: arm/arm64: vgic: fix possible spectre-v1 in vgic_get_irq() It's possible for userspace to control intid. Sanitize intid when using it as an array index. At the same time, sort the includes when adding . Found by smatch. Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Acked-by: Marc Zyngier Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Will Deacon --- virt/kvm/arm/vgic/vgic.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e74baec76361..bd125563b15b 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -14,11 +14,13 @@ * along with this program. If not, see . */ +#include +#include #include #include #include -#include -#include +#include + #include #include "vgic.h" @@ -101,12 +103,16 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid) { /* SGIs and PPIs */ - if (intid <= VGIC_MAX_PRIVATE) + if (intid <= VGIC_MAX_PRIVATE) { + intid = array_index_nospec(intid, VGIC_MAX_PRIVATE); return &vcpu->arch.vgic_cpu.private_irqs[intid]; + } /* SPIs */ - if (intid <= VGIC_MAX_SPI) + if (intid <= VGIC_MAX_SPI) { + intid = array_index_nospec(intid, VGIC_MAX_SPI); return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; + } /* LPIs */ if (intid >= VGIC_MIN_LPI) From 5e1ca5e23b167987d5b6d8b08f2d5b7dd2d13f49 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 25 Apr 2018 17:13:42 +0100 Subject: [PATCH 643/660] KVM: arm/arm64: vgic: fix possible spectre-v1 in vgic_mmio_read_apr() It's possible for userspace to control n. Sanitize n when using it as an array index. Note that while it appears that n must be bound to the interval [0,3] due to the way it is extracted from addr, we cannot guarantee that compiler transformations (and/or future refactoring) will ensure this is the case, and given this is a slow path it's better to always perform the masking. Found by smatch. Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Acked-by: Marc Zyngier Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Will Deacon --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index e21e2f49b005..ffc587bf4742 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include @@ -324,6 +326,9 @@ static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, if (n > vgic_v3_max_apr_idx(vcpu)) return 0; + + n = array_index_nospec(n, 4); + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ return vgicv3->vgic_ap1r[n]; } From f6997bec6af43396ff530caee79e178d32774a49 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Apr 2018 16:16:32 +0200 Subject: [PATCH 644/660] mtd: rawnand: marvell: fix the chip-select DT parsing logic The block responsible of parsing the DT for the number of chip-select lines uses an 'if/else if/else if' block. The content of the second and third 'else if' conditions are: 1/ the actual condition to enter the sub-block and 2/ the operation to do in this sub-block. [...] else if (condition1_to_enter && action1() == failed) raise_error(); else if (condition2_to_enter && action2() == failed) raise_error(); [...] In case of failure, the sub-block is entered and an error raised. Otherwise, in case of success, the code would continue erroneously in the next 'else if' statement because it did not failed (and did not enter the first 'else if' sub-block). The first 'else if' refers to legacy bindings while the second 'else if' refers to new bindings. The second 'else if', which is entered erroneously, checks for the 'reg' property, which, for old bindings, does not mean anything because it would not be the number of CS available, but the regular register map of almost any DT node. This being said, the content of the 'reg' property being the register map offset and length, it has '2' values, so the number of CS in this situation is assumed to be '2'. When running nand_scan_ident() with 2 CS, the core will check for an array of chips. It will first issue a RESET and then a READ_ID. Of course this will trigger two timeouts because there is no chip in front of the second CS: [ 1.367460] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000080) [ 1.474292] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000280) Indeed, this is harmless and the core will then assume there is only one valid CS. Fix the logic in the whole block by entering each sub-block just on the 'is legacy' condition, doing the action inside the sub-block. This way, when the action succeeds, the whole block is left. Furthermore, for both the old bindings and the new bindings the same logic was applied to retrieve the number of CS lines: using of_get_property() to get a size in bytes, converted in the actual number of lines by dividing it per sizeof(u32) (4 bytes). This is fine for the 'reg' property which is a list of the CS IDs but not for the 'num-cs' property which is directly the value of the number of CS. Anyway, no existing DT uses another value than 'num-cs = <1>' and no other value has ever been supported by the old driver (pxa3xx_nand.c). Remove this condition and apply a number of 1 CS anyway, as already described in the bindings. Finally, the 'reg' property of a 'nand' node (with the new bindings) gives the IDs of each CS line in use. marvell_nand.c driver first look at the number of CS lines that are present in this property. Better use of_property_count_elems_of_size() than dividing by 4 the size of the number of bytes returned by of_get_property(). Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller driver") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Tested-by: Chris Packham Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/marvell_nand.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 10e953218948..1d779a35ac8e 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -2299,29 +2299,20 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, /* * The legacy "num-cs" property indicates the number of CS on the only * chip connected to the controller (legacy bindings does not support - * more than one chip). CS are only incremented one by one while the RB - * pin is always the #0. + * more than one chip). The CS and RB pins are always the #0. * * When not using legacy bindings, a couple of "reg" and "nand-rb" * properties must be filled. For each chip, expressed as a subnode, * "reg" points to the CS lines and "nand-rb" to the RB line. */ - if (pdata) { + if (pdata || nfc->caps->legacy_of_bindings) { nsels = 1; - } else if (nfc->caps->legacy_of_bindings && - !of_get_property(np, "num-cs", &nsels)) { - dev_err(dev, "missing num-cs property\n"); - return -EINVAL; - } else if (!of_get_property(np, "reg", &nsels)) { - dev_err(dev, "missing reg property\n"); - return -EINVAL; - } - - if (!pdata) - nsels /= sizeof(u32); - if (!nsels) { - dev_err(dev, "invalid reg property size\n"); - return -EINVAL; + } else { + nsels = of_property_count_elems_of_size(np, "reg", sizeof(u32)); + if (nsels <= 0) { + dev_err(dev, "missing/invalid reg property\n"); + return -EINVAL; + } } /* Alloc the nand chip structure */ From b837913fc2d9061bf9b8c0dd6bf2d24e2f98b84a Mon Sep 17 00:00:00 2001 From: "jacek.tomaka@poczta.fm" Date: Tue, 24 Apr 2018 00:14:25 +0800 Subject: [PATCH 645/660] x86/cpu/intel: Add missing TLB cpuid values Make kernel print the correct number of TLB entries on Intel Xeon Phi 7210 (and others) Before: [ 0.320005] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0 After: [ 0.320005] Last level dTLB entries: 4KB 256, 2MB 128, 4MB 128, 1GB 16 The entries do exist in the official Intel SMD but the type column there is incorrect (states "Cache" where it should read "TLB"), but the entries for the values 0x6B, 0x6C and 0x6D are correctly described as 'Data TLB'. Signed-off-by: Jacek Tomaka Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180423161425.24366-1-jacekt@dugeo.com --- arch/x86/kernel/cpu/intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b9693b80fc21..60d1897041da 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -835,6 +835,9 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, + { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, + { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, + { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, From c0f7f5b6c69107ca92909512533e70258ee19188 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Wed, 25 Apr 2018 16:29:31 +0530 Subject: [PATCH 646/660] cpufreq: powernv: Fix hardlockup due to synchronous smp_call in timer interrupt gpstate_timer_handler() uses synchronous smp_call to set the pstate on the requested core. This causes the below hard lockup: smp_call_function_single+0x110/0x180 (unreliable) smp_call_function_any+0x180/0x250 gpstate_timer_handler+0x1e8/0x580 call_timer_fn+0x50/0x1c0 expire_timers+0x138/0x1f0 run_timer_softirq+0x1e8/0x270 __do_softirq+0x158/0x3e4 irq_exit+0xe8/0x120 timer_interrupt+0x9c/0xe0 decrementer_common+0x114/0x120 -- interrupt: 901 at doorbell_global_ipi+0x34/0x50 LR = arch_send_call_function_ipi_mask+0x120/0x130 arch_send_call_function_ipi_mask+0x4c/0x130 smp_call_function_many+0x340/0x450 pmdp_invalidate+0x98/0xe0 change_huge_pmd+0xe0/0x270 change_protection_range+0xb88/0xe40 mprotect_fixup+0x140/0x340 SyS_mprotect+0x1b4/0x350 system_call+0x58/0x6c One way to avoid this is removing the smp-call. We can ensure that the timer always runs on one of the policy-cpus. If the timer gets migrated to a cpu outside the policy then re-queue it back on the policy->cpus. This way we can get rid of the smp-call which was being used to set the pstate on the policy->cpus. Fixes: 7bc54b652f13 ("timers, cpufreq/powernv: Initialize the gpstate timer as pinned") Cc: stable@vger.kernel.org # v4.8+ Reported-by: Nicholas Piggin Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Shilpasri G Bhat Acked-by: Nicholas Piggin Acked-by: Viresh Kumar Acked-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman --- drivers/cpufreq/powernv-cpufreq.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 0591874856d3..54edaec1e608 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -679,6 +679,16 @@ void gpstate_timer_handler(struct timer_list *t) if (!spin_trylock(&gpstates->gpstate_lock)) return; + /* + * If the timer has migrated to the different cpu then bring + * it back to one of the policy->cpus + */ + if (!cpumask_test_cpu(raw_smp_processor_id(), policy->cpus)) { + gpstates->timer.expires = jiffies + msecs_to_jiffies(1); + add_timer_on(&gpstates->timer, cpumask_first(policy->cpus)); + spin_unlock(&gpstates->gpstate_lock); + return; + } /* * If PMCR was last updated was using fast_swtich then @@ -718,10 +728,8 @@ void gpstate_timer_handler(struct timer_list *t) if (gpstate_idx != gpstates->last_lpstate_idx) queue_gpstate_timer(gpstates); + set_pstate(&freq_data); spin_unlock(&gpstates->gpstate_lock); - - /* Timer may get migrated to a different cpu on cpu hot unplug */ - smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1); } /* From 6029755eed95e5c90f763188c87ae3ff41e48e5c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 27 Apr 2018 11:51:59 +1000 Subject: [PATCH 647/660] powerpc: Fix deadlock with multiple calls to smp_send_stop smp_send_stop can lock up the IPI path for any subsequent calls, because the receiving CPUs spin in their handler function. This started becoming a problem with the addition of an smp_send_stop call in the reboot path, because panics can reboot after doing their own smp_send_stop. The NMI IPI variant was fixed with ac61c11566 ("powerpc: Fix smp_send_stop NMI IPI handling"), which leaves the smp_call_function variant. This is fixed by having smp_send_stop only ever do the smp_call_function once. This is a bit less robust than the NMI IPI fix, because any other call to smp_call_function after smp_send_stop could deadlock, but that has always been the case, and it was not been a problem before. Fixes: f2748bdfe1573 ("powerpc/powernv: Always stop secondaries before reboot/shutdown") Reported-by: Abdul Haleem Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 63 ++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3582f30b60b7..9ca7148b5881 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -565,6 +565,35 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif +#ifdef CONFIG_NMI_IPI +static void nmi_stop_this_cpu(struct pt_regs *regs) +{ + /* + * This is a special case because it never returns, so the NMI IPI + * handling would never mark it as done, which makes any later + * smp_send_nmi_ipi() call spin forever. Mark it done now. + * + * IRQs are already hard disabled by the smp_handle_nmi_ipi. + */ + nmi_ipi_lock(); + nmi_ipi_busy_count--; + nmi_ipi_unlock(); + + /* Remove this CPU */ + set_cpu_online(smp_processor_id(), false); + + spin_begin(); + while (1) + spin_cpu_relax(); +} + +void smp_send_stop(void) +{ + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000); +} + +#else /* CONFIG_NMI_IPI */ + static void stop_this_cpu(void *dummy) { /* Remove this CPU */ @@ -576,30 +605,24 @@ static void stop_this_cpu(void *dummy) spin_cpu_relax(); } -#ifdef CONFIG_NMI_IPI -static void nmi_stop_this_cpu(struct pt_regs *regs) -{ - /* - * This is a special case because it never returns, so the NMI IPI - * handling would never mark it as done, which makes any later - * smp_send_nmi_ipi() call spin forever. Mark it done now. - */ - nmi_ipi_lock(); - nmi_ipi_busy_count--; - nmi_ipi_unlock(); - - stop_this_cpu(NULL); -} -#endif - void smp_send_stop(void) { -#ifdef CONFIG_NMI_IPI - smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000); -#else + static bool stopped = false; + + /* + * Prevent waiting on csd lock from a previous smp_send_stop. + * This is racy, but in general callers try to do the right + * thing and only fire off one smp_send_stop (e.g., see + * kernel/panic.c) + */ + if (stopped) + return; + + stopped = true; + smp_call_function(stop_this_cpu, NULL, 0); -#endif } +#endif /* CONFIG_NMI_IPI */ struct thread_info *current_set[NR_CPUS]; From b2d7ecbe355698010a6b7a15eb179e09eb3d6a34 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 26 Apr 2018 15:33:19 +0300 Subject: [PATCH 648/660] powerpc/kvm/booke: Fix altivec related build break Add missing "altivec unavailable" interrupt injection helper thus fixing the linker error below: arch/powerpc/kvm/emulate_loadstore.o: In function `kvmppc_check_altivec_disabled': arch/powerpc/kvm/emulate_loadstore.c: undefined reference to `.kvmppc_core_queue_vec_unavail' Fixes: 09f984961c137c4b ("KVM: PPC: Book3S: Add MMIO emulation for VMX instructions") Signed-off-by: Laurentiu Tudor Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/booke.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6038e2e7aee0..876d4f294fdd 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -305,6 +305,13 @@ void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu) kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); } +#ifdef CONFIG_ALTIVEC +void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL); +} +#endif + void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) { kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); From 3789c122d0a016b947ce5c05d3f1fbafa5db8f26 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 27 Apr 2018 11:50:36 +0100 Subject: [PATCH 649/660] arm64: avoid instrumenting atomic_ll_sc.o Our out-of-line atomics are built with a special calling convention, preventing pointless stack spilling, and allowing us to patch call sites with ARMv8.1 atomic instructions. Instrumentation inserted by the compiler may result in calls to functions not following this special calling convention, resulting in registers being unexpectedly clobbered, and various problems resulting from this. For example, if a kernel is built with KCOV and ARM64_LSE_ATOMICS, the compiler inserts calls to __sanitizer_cov_trace_pc in the prologues of the atomic functions. This has been observed to result in spurious cmpxchg failures, leading to a hang early on in the boot process. This patch avoids such issues by preventing instrumentation of our out-of-line atomics. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/lib/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 0ead8a1d1679..137710f4dac3 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -19,5 +19,9 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ -fcall-saved-x18 -fomit-frame-pointer CFLAGS_REMOVE_atomic_ll_sc.o := -pg +GCOV_PROFILE_atomic_ll_sc.o := n +KASAN_SANITIZE_atomic_ll_sc.o := n +KCOV_INSTRUMENT_atomic_ll_sc.o := n +UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o From 23a27722b5292ef0b27403c87a109feea8296a5c Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Thu, 19 Apr 2018 15:29:22 +0300 Subject: [PATCH 650/660] i2c: dev: prevent ZERO_SIZE_PTR deref in i2cdev_ioctl_rdwr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i2cdev_ioctl_rdwr() allocates i2c_msg.buf using memdup_user(), which returns ZERO_SIZE_PTR if i2c_msg.len is zero. Currently i2cdev_ioctl_rdwr() always dereferences the buf pointer in case of I2C_M_RD | I2C_M_RECV_LEN transfer. That causes a kernel oops in case of zero len. Let's check the len against zero before dereferencing buf pointer. This issue was triggered by syzkaller. Signed-off-by: Alexander Popov Reviewed-by: Uwe Kleine-König [wsa: use '< 1' instead of '!' for easier readability] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 036a03f0d0a6..1667b6e7674f 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -280,7 +280,7 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, */ if (msgs[i].flags & I2C_M_RECV_LEN) { if (!(msgs[i].flags & I2C_M_RD) || - msgs[i].buf[0] < 1 || + msgs[i].len < 1 || msgs[i].buf[0] < 1 || msgs[i].len < msgs[i].buf[0] + I2C_SMBUS_BLOCK_MAX) { res = -EINVAL; From da33aa03fa34c918faf2c371ebda0dd961d7ccb2 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 9 Apr 2018 14:40:54 +0800 Subject: [PATCH 651/660] i2c: sprd: Prevent i2c accesses after suspend is called Add one flag to indicate if the i2c controller has been in suspend state, which can prevent i2c accesses after i2c controller is suspended following system suspend. Signed-off-by: Baolin Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sprd.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 25fcc3c1e32b..2fdad63a23a1 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -86,6 +86,7 @@ struct sprd_i2c { u32 count; int irq; int err; + bool is_suspended; }; static void sprd_i2c_set_count(struct sprd_i2c *i2c_dev, u32 count) @@ -283,6 +284,9 @@ static int sprd_i2c_master_xfer(struct i2c_adapter *i2c_adap, struct sprd_i2c *i2c_dev = i2c_adap->algo_data; int im, ret; + if (i2c_dev->is_suspended) + return -EBUSY; + ret = pm_runtime_get_sync(i2c_dev->dev); if (ret < 0) return ret; @@ -586,11 +590,23 @@ static int sprd_i2c_remove(struct platform_device *pdev) static int __maybe_unused sprd_i2c_suspend_noirq(struct device *pdev) { + struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev); + + i2c_lock_adapter(&i2c_dev->adap); + i2c_dev->is_suspended = true; + i2c_unlock_adapter(&i2c_dev->adap); + return pm_runtime_force_suspend(pdev); } static int __maybe_unused sprd_i2c_resume_noirq(struct device *pdev) { + struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev); + + i2c_lock_adapter(&i2c_dev->adap); + i2c_dev->is_suspended = false; + i2c_unlock_adapter(&i2c_dev->adap); + return pm_runtime_force_resume(pdev); } From 2a010461207cc96bee5ab81748325dec1972976f Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 9 Apr 2018 14:40:55 +0800 Subject: [PATCH 652/660] i2c: sprd: Fix the i2c count issue We found the I2C controller count register is unreliable sometimes, that will cause I2C to lose data. Thus we can read the data count from 'i2c_dev->count' instead of the I2C controller count register. Signed-off-by: Baolin Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sprd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 2fdad63a23a1..4053259bccb8 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -368,13 +368,12 @@ static irqreturn_t sprd_i2c_isr_thread(int irq, void *dev_id) struct sprd_i2c *i2c_dev = dev_id; struct i2c_msg *msg = i2c_dev->msg; bool ack = !(readl(i2c_dev->base + I2C_STATUS) & I2C_RX_ACK); - u32 i2c_count = readl(i2c_dev->base + I2C_COUNT); u32 i2c_tran; if (msg->flags & I2C_M_RD) i2c_tran = i2c_dev->count >= I2C_FIFO_FULL_THLD; else - i2c_tran = i2c_count; + i2c_tran = i2c_dev->count; /* * If we got one ACK from slave when writing data, and we did not @@ -412,14 +411,13 @@ static irqreturn_t sprd_i2c_isr(int irq, void *dev_id) { struct sprd_i2c *i2c_dev = dev_id; struct i2c_msg *msg = i2c_dev->msg; - u32 i2c_count = readl(i2c_dev->base + I2C_COUNT); bool ack = !(readl(i2c_dev->base + I2C_STATUS) & I2C_RX_ACK); u32 i2c_tran; if (msg->flags & I2C_M_RD) i2c_tran = i2c_dev->count >= I2C_FIFO_FULL_THLD; else - i2c_tran = i2c_count; + i2c_tran = i2c_dev->count; /* * If we did not get one ACK from slave when writing data, then we From 3db3eb285259ac129f7aec6b814b3e9f6c1b372b Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 25 Apr 2018 12:08:35 +0200 Subject: [PATCH 653/660] x86/setup: Do not reserve a crash kernel region if booted on Xen PV Xen PV domains cannot shut down and start a crash kernel. Instead, the crashing kernel makes a SCHEDOP_shutdown hypercall with the reason code SHUTDOWN_crash, cf. xen_crash_shutdown() machine op in arch/x86/xen/enlighten_pv.c. A crash kernel reservation is merely a waste of RAM in this case. It may also confuse users of kexec_load(2) and/or kexec_file_load(2). When flags include KEXEC_ON_CRASH or KEXEC_FILE_ON_CRASH, respectively, these syscalls return success, which is technically correct, but the crash kexec image will never be actually used. Signed-off-by: Petr Tesarik Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Tom Lendacky Cc: Dou Liyang Cc: Mikulas Patocka Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: xen-devel@lists.xenproject.org Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Jean Delvare Link: https://lkml.kernel.org/r/20180425120835.23cef60c@ezekiel.suse.cz --- arch/x86/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6285697b6e56..5c623dfe39d1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -534,6 +535,11 @@ static void __init reserve_crashkernel(void) high = true; } + if (xen_pv_domain()) { + pr_info("Ignoring crashkernel for a Xen PV domain\n"); + return; + } + /* 0 means: find the address automatically */ if (crash_base <= 0) { /* From 1a512c0882bd311c5b5561840fcfbe4c25b8f319 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 24 Apr 2018 23:19:51 +0200 Subject: [PATCH 654/660] x86/ipc: Fix x32 version of shmid64_ds and msqid64_ds A bugfix broke the x32 shmid64_ds and msqid64_ds data structure layout (as seen from user space) a few years ago: Originally, __BITS_PER_LONG was defined as 64 on x32, so we did not have padding after the 64-bit __kernel_time_t fields, After __BITS_PER_LONG got changed to 32, applications would observe extra padding. In other parts of the uapi headers we seem to have a mix of those expecting either 32 or 64 on x32 applications, so we can't easily revert the path that broke these two structures. Instead, this patch decouples x32 from the other architectures and moves it back into arch specific headers, partially reverting the even older commit 73a2d096fdf2 ("x86: remove all now-duplicate header files"). It's not clear whether this ever made any difference, since at least glibc carries its own (correct) copy of both of these header files, so possibly no application has ever observed the definitions here. Based on a suggestion from H.J. Lu, I tried out the tool from https://github.com/hjl-tools/linux-header to find other such bugs, which pointed out the same bug in statfs(), which also has a separate (correct) copy in glibc. Fixes: f4b4aae18288 ("x86/headers/uapi: Fix __BITS_PER_LONG value for x32 builds") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: "H . J . Lu" Cc: Jeffrey Walton Cc: stable@vger.kernel.org Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180424212013.3967461-1-arnd@arndb.de --- arch/x86/include/uapi/asm/msgbuf.h | 31 ++++++++++++++++++++++ arch/x86/include/uapi/asm/shmbuf.h | 42 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h index 809134c644a6..90ab9a795b49 100644 --- a/arch/x86/include/uapi/asm/msgbuf.h +++ b/arch/x86/include/uapi/asm/msgbuf.h @@ -1 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_X64_MSGBUF_H +#define __ASM_X64_MSGBUF_H + +#if !defined(__x86_64__) || !defined(__ILP32__) #include +#else +/* + * The msqid64_ds structure for x86 architecture with x32 ABI. + * + * On x86-32 and x86-64 we can just use the generic definition, but + * x32 uses the same binary layout as x86_64, which is differnet + * from other 32-bit architectures. + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + __kernel_time_t msg_rtime; /* last msgrcv time */ + __kernel_time_t msg_ctime; /* last change time */ + __kernel_ulong_t msg_cbytes; /* current number of bytes on queue */ + __kernel_ulong_t msg_qnum; /* number of messages in queue */ + __kernel_ulong_t msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + __kernel_ulong_t __unused4; + __kernel_ulong_t __unused5; +}; + +#endif + +#endif /* __ASM_GENERIC_MSGBUF_H */ diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h index 83c05fc2de38..644421f3823b 100644 --- a/arch/x86/include/uapi/asm/shmbuf.h +++ b/arch/x86/include/uapi/asm/shmbuf.h @@ -1 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_X86_SHMBUF_H +#define __ASM_X86_SHMBUF_H + +#if !defined(__x86_64__) || !defined(__ILP32__) #include +#else +/* + * The shmid64_ds structure for x86 architecture with x32 ABI. + * + * On x86-32 and x86-64 we can just use the generic definition, but + * x32 uses the same binary layout as x86_64, which is differnet + * from other 32-bit architectures. + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ + __kernel_time_t shm_dtime; /* last detach time */ + __kernel_time_t shm_ctime; /* last change time */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + __kernel_ulong_t shm_nattch; /* no. of current attaches */ + __kernel_ulong_t __unused4; + __kernel_ulong_t __unused5; +}; + +struct shminfo64 { + __kernel_ulong_t shmmax; + __kernel_ulong_t shmmin; + __kernel_ulong_t shmmni; + __kernel_ulong_t shmseg; + __kernel_ulong_t shmall; + __kernel_ulong_t __unused1; + __kernel_ulong_t __unused2; + __kernel_ulong_t __unused3; + __kernel_ulong_t __unused4; +}; + +#endif + +#endif /* __ASM_X86_SHMBUF_H */ From 8bb2610bc4967f19672444a7b0407367f1540028 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 17 Apr 2018 07:36:36 -0700 Subject: [PATCH 655/660] x86/entry/64/compat: Preserve r8-r11 in int $0x80 32-bit user code that uses int $80 doesn't care about r8-r11. There is, however, some 64-bit user code that intentionally uses int $0x80 to invoke 32-bit system calls. From what I've seen, basically all such code assumes that r8-r15 are all preserved, but the kernel clobbers r8-r11. Since I doubt that there's any code that depends on int $0x80 zeroing r8-r11, change the kernel to preserve them. I suspect that very little user code is broken by the old clobber, since r8-r11 are only rarely allocated by gcc, and they're clobbered by function calls, so they only way we'd see a problem is if the same function that invokes int $0x80 also spills something important to one of these registers. The current behavior seems to date back to the historical commit "[PATCH] x86-64 merge for 2.6.4". Before that, all regs were preserved. I can't find any explanation of why this change was made. Update the test_syscall_vdso_32 testcase as well to verify the new behavior, and it strengthens the test to make sure that the kernel doesn't accidentally permute r8..r15. Suggested-by: Denys Vlasenko Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Dominik Brodowski Link: https://lkml.kernel.org/r/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org --- arch/x86/entry/entry_64_compat.S | 8 ++--- .../testing/selftests/x86/test_syscall_vdso.c | 35 +++++++++++-------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 9af927e59d49..9de7f1e1dede 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ + pushq %r8 /* pt_regs->r8 */ xorl %r8d, %r8d /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ + pushq %r9 /* pt_regs->r9 */ xorl %r9d, %r9d /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ + pushq %r10 /* pt_regs->r10 */ xorl %r10d, %r10d /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ + pushq %r11 /* pt_regs->r11 */ xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c index 40370354d4c1..c9c3281077bc 100644 --- a/tools/testing/selftests/x86/test_syscall_vdso.c +++ b/tools/testing/selftests/x86/test_syscall_vdso.c @@ -100,12 +100,19 @@ asm ( " shl $32, %r8\n" " orq $0x7f7f7f7f, %r8\n" " movq %r8, %r9\n" - " movq %r8, %r10\n" - " movq %r8, %r11\n" - " movq %r8, %r12\n" - " movq %r8, %r13\n" - " movq %r8, %r14\n" - " movq %r8, %r15\n" + " incq %r9\n" + " movq %r9, %r10\n" + " incq %r10\n" + " movq %r10, %r11\n" + " incq %r11\n" + " movq %r11, %r12\n" + " incq %r12\n" + " movq %r12, %r13\n" + " incq %r13\n" + " movq %r13, %r14\n" + " incq %r14\n" + " movq %r14, %r15\n" + " incq %r15\n" " ret\n" " .code32\n" " .popsection\n" @@ -128,12 +135,13 @@ int check_regs64(void) int err = 0; int num = 8; uint64_t *r64 = ®s64.r8; + uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; if (!kernel_is_64bit) return 0; do { - if (*r64 == 0x7f7f7f7f7f7f7f7fULL) + if (*r64 == expected++) continue; /* register did not change */ if (syscall_addr != (long)&int80) { /* @@ -147,18 +155,17 @@ int check_regs64(void) continue; } } else { - /* INT80 syscall entrypoint can be used by + /* + * INT80 syscall entrypoint can be used by * 64-bit programs too, unlike SYSCALL/SYSENTER. * Therefore it must preserve R12+ * (they are callee-saved registers in 64-bit C ABI). * - * This was probably historically not intended, - * but R8..11 are clobbered (cleared to 0). - * IOW: they are the only registers which aren't - * preserved across INT80 syscall. + * Starting in Linux 4.17 (and any kernel that + * backports the change), R8..11 are preserved. + * Historically (and probably unintentionally), they + * were clobbered or zeroed. */ - if (*r64 == 0 && num <= 11) - continue; } printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); err++; From a468f2dbf921d02f5107378501693137a812999b Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 26 Apr 2018 13:09:50 -0700 Subject: [PATCH 656/660] kvm: apic: Flush TLB after APIC mode/address change if VPIDs are in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, KVM flushes the TLB after a change to the APIC access page address or the APIC mode when EPT mode is enabled. However, even in shadow paging mode, a TLB flush is needed if VPIDs are being used, as specified in the Intel SDM Section 29.4.5. So replace vmx_flush_tlb_ept_only() with vmx_flush_tlb(), which will flush if either EPT or VPIDs are in use. Signed-off-by: Junaid Shahid Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa66ccd6ed6c..c7668806163f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4544,12 +4544,6 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); } -static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) -{ - if (enable_ept) - vmx_flush_tlb(vcpu, true); -} - static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -9278,7 +9272,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } else { sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -9306,7 +9300,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) !nested_cpu_has2(get_vmcs12(&vmx->vcpu), SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } } @@ -11220,7 +11214,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } } else if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } /* @@ -12073,7 +12067,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, } else if (!nested_cpu_has_ept(vmcs12) && nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ From 5e62493f1a70e7f13059544daaee05e40e8548e2 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Tue, 17 Apr 2018 06:43:58 +0200 Subject: [PATCH 657/660] x86/headers/UAPI: Move DISABLE_EXITS KVM capability bits to the UAPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move DISABLE_EXITS KVM capability bits to the UAPI just like the rest of capabilities. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: KarimAllah Ahmed Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.h | 7 ------- include/uapi/linux/kvm.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7d35ce672989..c9492f764902 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -302,13 +302,6 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) __rem; \ }) -#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) -#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) -#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HTL | \ - KVM_X86_DISABLE_EXITS_PAUSE) - static inline bool kvm_mwait_in_guest(struct kvm *kvm) { return kvm->arch.mwait_in_guest; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1065006c9bf5..b02c41e53d56 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -676,6 +676,13 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_PAUSE) + /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { /* in */ From bf8f5de17442bba5f811e7e724980730e079ee11 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sat, 28 Apr 2018 19:05:04 +0200 Subject: [PATCH 658/660] MAINTAINERS: add myself as maintainer of AFFS The AFFS filesystem is still in use by m68k community (Link #2), but as there was no code activity and no maintainer, the filesystem appeared on the list of candidates for staging/removal (Link #1). I volunteer to act as a maintainer of AFFS to collect any fixes that might show up and to guard fs/affs/ against another spring cleaning. Link: https://lkml.kernel.org/r/20180425154602.GA8546@bombadil.infradead.org Link: https://lkml.kernel.org/r/1613268.lKBQxPXt8J@merkaba CC: Martin Steigerwald CC: John Paul Adrian Glaubitz Signed-off-by: David Sterba Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index db2bc3fe0ba4..79bb02ff812f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -564,8 +564,9 @@ S: Maintained F: drivers/media/dvb-frontends/af9033* AFFS FILE SYSTEM +M: David Sterba L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Odd Fixes F: Documentation/filesystems/affs.txt F: fs/affs/ From 19b9ad67310ed2f685062a00aec602bec33835f0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 5 Feb 2018 19:32:18 +0200 Subject: [PATCH 659/660] : fix end_name_hash() for 64bit long The comment claims that this helper will try not to loose bits, but for 64bit long it looses the high bits before hashing 64bit long into 32bit int. Use the helper hash_long() to do the right thing for 64bit long. For 32bit long, there is no change. All the callers of end_name_hash() either assign the result to qstr->hash, which is u32 or return the result as an int value (e.g. full_name_hash()). Change the helper return type to int to conform to its users. [ It took me a while to apply this, because my initial reaction to it was - incorrectly - that it could make for slower code. After having looked more at it, I take back all my complaints about the patch, Amir was right and I was mis-reading things or just being stupid. I also don't worry too much about the possible performance impact of this on 64-bit, since most architectures that actually care about performance end up not using this very much (the dcache code is the most performance-critical, but the word-at-a-time case uses its own hashing anyway). So this ends up being mostly used for filesystems that do their own degraded hashing (usually because they want a case-insensitive comparison function). A _tiny_ worry remains, in that not everybody uses DCACHE_WORD_ACCESS, and then this potentially makes things more expensive on 64-bit architectures with slow or lacking multipliers even for the normal case. That said, realistically the only such architecture I can think of is PA-RISC. Nobody really cares about performance on that, it's more of a "look ma, I've got warts^W an odd machine" platform. So the patch is fine, and all my initial worries were just misplaced from not looking at this properly. - Linus ] Signed-off-by: Amir Goldstein Signed-off-by: Linus Torvalds --- include/linux/stringhash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h index e8f0f852968f..c0c5c5b73dc0 100644 --- a/include/linux/stringhash.h +++ b/include/linux/stringhash.h @@ -50,9 +50,9 @@ partial_name_hash(unsigned long c, unsigned long prevhash) * losing bits). This also has the property (wanted by the dcache) * that the msbits make a good hash table index. */ -static inline unsigned long end_name_hash(unsigned long hash) +static inline unsigned int end_name_hash(unsigned long hash) { - return __hash_32((unsigned int)hash); + return hash_long(hash, 32); } /* From 6da6c0db5316275015e8cc2959f12a17584aeb64 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Apr 2018 14:17:42 -0700 Subject: [PATCH 660/660] Linux v4.17-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 83b6c541565a..619a85ad716b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Fearless Coyote # *DOCUMENTATION*