From 40a1937317e26d05c815096e1151df6600b7b525 Mon Sep 17 00:00:00 2001 From: Adrian Salido Date: Thu, 27 Apr 2017 10:32:55 -0700 Subject: [PATCH 001/465] dm ioctl: prevent stack leak in dm ioctl call commit 4617f564c06117c7d1b611be49521a4430042287 upstream. When calling a dm ioctl that doesn't process any data (IOCTL_FLAGS_NO_PARAMS), the contents of the data field in struct dm_ioctl are left initialized. Current code is incorrectly extending the size of data copied back to user, causing the contents of kernel stack to be leaked to user. Fix by only copying contents before data and allow the functions processing the ioctl to override. Signed-off-by: Adrian Salido Reviewed-by: Alasdair G Kergon Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 4da6fc6b1ffd..3d040f52539c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1848,7 +1848,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) if (r) goto out; - param->data_size = sizeof(*param); + param->data_size = offsetof(struct dm_ioctl, data); r = fn(param, input_param_size); if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) && From 648ada88cba2e78171d8ee81f4fceffc0386f6aa Mon Sep 17 00:00:00 2001 From: Vincent Abriou Date: Thu, 23 Mar 2017 15:44:52 +0100 Subject: [PATCH 002/465] drm/sti: fix GDP size to support up to UHD resolution commit 2f410f88c0a1c7e19762918d2614f506a7b63a82 upstream. On stih407-410 chip family the GDP layers are able to support up to UHD resolution (3840 x 2160). Signed-off-by: Vincent Abriou Acked-by: Lee Jones Tested-by: Lee Jones Link: http://patchwork.freedesktop.org/patch/msgid/1490280292-30466-1-git-send-email-vincent.abriou@st.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/sti/sti_gdp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c index 86279f5022c2..88f16cdf6a4b 100644 --- a/drivers/gpu/drm/sti/sti_gdp.c +++ b/drivers/gpu/drm/sti/sti_gdp.c @@ -66,7 +66,9 @@ static struct gdp_format_to_str { #define GAM_GDP_ALPHARANGE_255 BIT(5) #define GAM_GDP_AGC_FULL_RANGE 0x00808080 #define GAM_GDP_PPT_IGNORE (BIT(1) | BIT(0)) -#define GAM_GDP_SIZE_MAX 0x7FF + +#define GAM_GDP_SIZE_MAX_WIDTH 3840 +#define GAM_GDP_SIZE_MAX_HEIGHT 2160 #define GDP_NODE_NB_BANK 2 #define GDP_NODE_PER_FIELD 2 @@ -632,8 +634,8 @@ static int sti_gdp_atomic_check(struct drm_plane *drm_plane, /* src_x are in 16.16 format */ src_x = state->src_x >> 16; src_y = state->src_y >> 16; - src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX); - src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX); + src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX_WIDTH); + src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX_HEIGHT); format = sti_gdp_fourcc2format(fb->format->format); if (format == -1) { @@ -741,8 +743,8 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane, /* src_x are in 16.16 format */ src_x = state->src_x >> 16; src_y = state->src_y >> 16; - src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX); - src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX); + src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX_WIDTH); + src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX_HEIGHT); list = sti_gdp_get_free_nodes(gdp); top_field = list->top_field; From b998524b6c6082805dded8b03c826233f66241bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Sat, 25 Mar 2017 18:00:49 +0200 Subject: [PATCH 003/465] power: supply: lp8788: prevent out of bounds array access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bdd9968d35f7fcdb76089347d1529bf079534214 upstream. val might become 7 in which case stime[7] (array of length 7) would be accessed during the scnprintf call later and that will cause issues. Obviously, string concatenation is not intended here so just a comma needs to be added to fix the issue. Fixes: 98a276649358 ("power_supply: Add new lp8788 charger driver") Signed-off-by: Giedrius Statkevičius Acked-by: Milo Kim Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/lp8788-charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/lp8788-charger.c b/drivers/power/supply/lp8788-charger.c index 509e2b341bd6..677f7c40b25a 100644 --- a/drivers/power/supply/lp8788-charger.c +++ b/drivers/power/supply/lp8788-charger.c @@ -651,7 +651,7 @@ static ssize_t lp8788_show_eoc_time(struct device *dev, { struct lp8788_charger *pchg = dev_get_drvdata(dev); char *stime[] = { "400ms", "5min", "10min", "15min", - "20min", "25min", "30min" "No timeout" }; + "20min", "25min", "30min", "No timeout" }; u8 val; lp8788_read_byte(pchg->lp, LP8788_CHG_EOC, &val); From 7f073b7e4d63afdcf3abc91f8d9c11ab9317ed0b Mon Sep 17 00:00:00 2001 From: James Hughes Date: Mon, 24 Apr 2017 12:40:50 +0100 Subject: [PATCH 004/465] brcmfmac: Ensure pointer correctly set if skb data location changes commit 455a1eb4654c24560eb9dfc634f29cba3d87601e upstream. The incoming skb header may be resized if header space is insufficient, which might change the data adddress in the skb. Ensure that a cached pointer to that data is correctly set by moving assignment to after any possible changes. Signed-off-by: James Hughes Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 60da86a8d95b..b56440ab520b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -198,7 +198,7 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb, int ret; struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_pub *drvr = ifp->drvr; - struct ethhdr *eh = (struct ethhdr *)(skb->data); + struct ethhdr *eh; brcmf_dbg(DATA, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); @@ -236,6 +236,8 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb, goto done; } + eh = (struct ethhdr *)(skb->data); + if (eh->h_proto == htons(ETH_P_PAE)) atomic_inc(&ifp->pend_8021x_cnt); From c98f47c1f645774e02e04e94111c98b97462ee98 Mon Sep 17 00:00:00 2001 From: James Hughes Date: Tue, 25 Apr 2017 10:15:06 +0100 Subject: [PATCH 005/465] brcmfmac: Make skb header writable before use commit 9cc4b7cb86cbcc6330a3faa8cd65268cd2d3c227 upstream. The driver was making changes to the skb_header without ensuring it was writable (i.e. uncloned). This patch also removes some boiler plate header size checking/adjustment code as that is also handled by the skb_cow_header function used to make header writable. Signed-off-by: James Hughes Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- .../broadcom/brcm80211/brcmfmac/core.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index b56440ab520b..f6b17fb58877 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -211,22 +211,13 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb, goto done; } - /* Make sure there's enough room for any header */ - if (skb_headroom(skb) < drvr->hdrlen) { - struct sk_buff *skb2; - - brcmf_dbg(INFO, "%s: insufficient headroom\n", + /* Make sure there's enough writable headroom*/ + ret = skb_cow_head(skb, drvr->hdrlen); + if (ret < 0) { + brcmf_err("%s: skb_cow_head failed\n", brcmf_ifname(ifp)); - drvr->bus_if->tx_realloc++; - skb2 = skb_realloc_headroom(skb, drvr->hdrlen); dev_kfree_skb(skb); - skb = skb2; - if (skb == NULL) { - brcmf_err("%s: skb_realloc_headroom failed\n", - brcmf_ifname(ifp)); - ret = -ENOMEM; - goto done; - } + goto done; } /* validate length for ether packet */ From e2b8851f1fdfe1d038fa9a03497d257f7a8ffb63 Mon Sep 17 00:00:00 2001 From: Dave Aldridge Date: Tue, 9 May 2017 02:57:35 -0600 Subject: [PATCH 006/465] sparc64: fix fault handling in NGbzero.S and GENbzero.S commit 3c7f62212018b904ae17f5636ead18a4dca3a88f upstream. When any of the functions contained in NGbzero.S and GENbzero.S vector through *bzero_from_clear_user, we may end up taking a fault when executing one of the store alternate address space instructions. If this happens, the exception handler does not restore the %asi register. This commit fixes the issue by introducing a new exception handler that ensures the %asi register is restored when a fault is handled. Orabug: 25577560 Signed-off-by: Dave Aldridge Reviewed-by: Rob Gardner Reviewed-by: Babu Moger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/head_64.S | 6 ++++++ arch/sparc/lib/GENbzero.S | 2 +- arch/sparc/lib/NGbzero.S | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 44101196d02b..41a407328667 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -939,3 +939,9 @@ ENTRY(__retl_o1) retl mov %o1, %o0 ENDPROC(__retl_o1) + +ENTRY(__retl_o1_asi) + wr %o5, 0x0, %asi + retl + mov %o1, %o0 +ENDPROC(__retl_o1_asi) diff --git a/arch/sparc/lib/GENbzero.S b/arch/sparc/lib/GENbzero.S index 8e7a843ddd88..2fbf6297d57c 100644 --- a/arch/sparc/lib/GENbzero.S +++ b/arch/sparc/lib/GENbzero.S @@ -8,7 +8,7 @@ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_o1; \ + .word 98b, __retl_o1_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGbzero.S b/arch/sparc/lib/NGbzero.S index beab29bf419b..33053bdf3766 100644 --- a/arch/sparc/lib/NGbzero.S +++ b/arch/sparc/lib/NGbzero.S @@ -8,7 +8,7 @@ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_o1; \ + .word 98b, __retl_o1_asi;\ .text; \ .align 4; From acc7c954b035b53c58714ce951354becaddfd5b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 May 2017 15:51:03 -0700 Subject: [PATCH 007/465] refcount: change EXPORT_SYMBOL markings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d557d1b58b3546bab2c5bc2d624c5709840e6b10 upstream. Now that kref is using the refcount apis, the _GPL markings are getting exported to places that it previously wasn't. Now kref.h is GPLv2 licensed, so any non-GPL code using it better be talking to some lawyers, but changing api markings isn't considered "nice", so let's fix this up. Cc: Philip Müller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- lib/refcount.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/refcount.c b/lib/refcount.c index aa09ad3c30b0..26dffb7e4c04 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -62,13 +62,13 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) return true; } -EXPORT_SYMBOL_GPL(refcount_add_not_zero); +EXPORT_SYMBOL(refcount_add_not_zero); void refcount_add(unsigned int i, refcount_t *r) { WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); } -EXPORT_SYMBOL_GPL(refcount_add); +EXPORT_SYMBOL(refcount_add); /* * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. @@ -101,7 +101,7 @@ bool refcount_inc_not_zero(refcount_t *r) return true; } -EXPORT_SYMBOL_GPL(refcount_inc_not_zero); +EXPORT_SYMBOL(refcount_inc_not_zero); /* * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. @@ -113,7 +113,7 @@ void refcount_inc(refcount_t *r) { WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); } -EXPORT_SYMBOL_GPL(refcount_inc); +EXPORT_SYMBOL(refcount_inc); bool refcount_sub_and_test(unsigned int i, refcount_t *r) { @@ -138,7 +138,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) return !new; } -EXPORT_SYMBOL_GPL(refcount_sub_and_test); +EXPORT_SYMBOL(refcount_sub_and_test); /* * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to @@ -152,7 +152,7 @@ bool refcount_dec_and_test(refcount_t *r) { return refcount_sub_and_test(1, r); } -EXPORT_SYMBOL_GPL(refcount_dec_and_test); +EXPORT_SYMBOL(refcount_dec_and_test); /* * Similar to atomic_dec(), it will WARN on underflow and fail to decrement @@ -166,7 +166,7 @@ void refcount_dec(refcount_t *r) { WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); } -EXPORT_SYMBOL_GPL(refcount_dec); +EXPORT_SYMBOL(refcount_dec); /* * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the @@ -183,7 +183,7 @@ bool refcount_dec_if_one(refcount_t *r) { return atomic_cmpxchg_release(&r->refs, 1, 0) == 1; } -EXPORT_SYMBOL_GPL(refcount_dec_if_one); +EXPORT_SYMBOL(refcount_dec_if_one); /* * No atomic_t counterpart, it decrements unless the value is 1, in which case @@ -217,7 +217,7 @@ bool refcount_dec_not_one(refcount_t *r) return true; } -EXPORT_SYMBOL_GPL(refcount_dec_not_one); +EXPORT_SYMBOL(refcount_dec_not_one); /* * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail @@ -240,7 +240,7 @@ bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) return true; } -EXPORT_SYMBOL_GPL(refcount_dec_and_mutex_lock); +EXPORT_SYMBOL(refcount_dec_and_mutex_lock); /* * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to @@ -263,5 +263,5 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) return true; } -EXPORT_SYMBOL_GPL(refcount_dec_and_lock); +EXPORT_SYMBOL(refcount_dec_and_lock); From c0e5b847c77e2b8265a5abe7f7642526f08cd51a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 26 Apr 2017 12:06:28 +0200 Subject: [PATCH 008/465] net: macb: fix phy interrupt parsing [ Upstream commit ae3696c167cc04d32634c4af82f43b446c5176b0 ] Since 83a77e9ec415, the phydev irq is explicitly set to PHY_POLL when there is no pdata. It doesn't work on DT enabled platforms because the phydev irq is already set by libphy before. Fixes: 83a77e9ec415 ("net: macb: Added PCI wrapper for Platform Driver.") Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 30606b11b128..377fb0f22a5b 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -432,15 +432,17 @@ static int macb_mii_probe(struct net_device *dev) } pdata = dev_get_platdata(&bp->pdev->dev); - if (pdata && gpio_is_valid(pdata->phy_irq_pin)) { - ret = devm_gpio_request(&bp->pdev->dev, pdata->phy_irq_pin, - "phy int"); - if (!ret) { - phy_irq = gpio_to_irq(pdata->phy_irq_pin); - phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq; + if (pdata) { + if (gpio_is_valid(pdata->phy_irq_pin)) { + ret = devm_gpio_request(&bp->pdev->dev, + pdata->phy_irq_pin, "phy int"); + if (!ret) { + phy_irq = gpio_to_irq(pdata->phy_irq_pin); + phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq; + } + } else { + phydev->irq = PHY_POLL; } - } else { - phydev->irq = PHY_POLL; } /* attach the mac to the phy */ From 011fbfb296e4cfcd5769fb3f84ae2f263b1faac1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 26 Apr 2017 19:07:35 +0200 Subject: [PATCH 009/465] tcp: fix access to sk->sk_state in tcp_poll() [ Upstream commit d68be71ea14d609a5f31534003319be5db422595 ] avoid direct access to sk->sk_state when tcp_poll() is called on a socket using active TCP fastopen with deferred connect. Use local variable 'state', which stores the result of sk_state_load(), like it was done in commit 00fd38d938db ("tcp: ensure proper barriers in lockless contexts"). Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support") Signed-off-by: Davide Caratti Acked-by: Wei Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 40ba4249a586..2dc7fcf60bf3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -533,7 +533,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; - } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect * Return POLLOUT so application can call write() * in order for kernel to generate SYN+data From 5a5b34164f93d5cb002a9eb6a9a8024e24e5e299 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Thu, 27 Apr 2017 14:11:53 -0700 Subject: [PATCH 010/465] geneve: fix incorrect setting of UDP checksum flag [ Upstream commit 5e0740c445e6ae4026f5e52456ff8d0be9725183 ] Creating a geneve link with 'udpcsum' set results in a creation of link for which UDP checksum will NOT be computed on outbound packets, as can be seen below. 11: gen0: mtu 1500 qdisc noop state DOWN link/ether c2:85:27:b6:b4:15 brd ff:ff:ff:ff:ff:ff promiscuity 0 geneve id 200 remote 192.168.13.1 dstport 6081 noudpcsum Similarly, creating a link with 'noudpcsum' set results in a creation of link for which UDP checksum will be computed on outbound packets. Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") Signed-off-by: Girish Moodalbail Acked-by: Pravin B Shelar Acked-by: Lance Richardson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 7074b40ebd7f..dec5d563ab19 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1244,7 +1244,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, metadata = true; if (data[IFLA_GENEVE_UDP_CSUM] && - !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) + nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) info.key.tun_flags |= TUNNEL_CSUM; if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && From 34acfbf08cce238b79cfdc6c59a5fea020056c08 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 29 Apr 2017 22:52:42 -0700 Subject: [PATCH 011/465] bpf: enhance verifier to understand stack pointer arithmetic [ Upstream commit 332270fdc8b6fba07d059a9ad44df9e1a2ad4529 ] llvm 4.0 and above generates the code like below: .... 440: (b7) r1 = 15 441: (05) goto pc+73 515: (79) r6 = *(u64 *)(r10 -152) 516: (bf) r7 = r10 517: (07) r7 += -112 518: (bf) r2 = r7 519: (0f) r2 += r1 520: (71) r1 = *(u8 *)(r8 +0) 521: (73) *(u8 *)(r2 +45) = r1 .... and the verifier complains "R2 invalid mem access 'inv'" for insn #521. This is because verifier marks register r2 as unknown value after #519 where r2 is a stack pointer and r1 holds a constant value. Teach verifier to recognize "stack_ptr + imm" and "stack_ptr + reg with const val" as valid stack_ptr with new offset. Signed-off-by: Yonghong Song Acked-by: Martin KaFai Lau Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 11 +++++++++++ tools/testing/selftests/bpf/test_verifier.c | 18 ++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a834068a400e..f27849e580c6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1909,6 +1909,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) dst_reg->type = PTR_TO_STACK; dst_reg->imm = insn->imm; return 0; + } else if (opcode == BPF_ADD && + BPF_CLASS(insn->code) == BPF_ALU64 && + dst_reg->type == PTR_TO_STACK && + ((BPF_SRC(insn->code) == BPF_X && + regs[insn->src_reg].type == CONST_IMM) || + BPF_SRC(insn->code) == BPF_K)) { + if (BPF_SRC(insn->code) == BPF_X) + dst_reg->imm += regs[insn->src_reg].imm; + else + dst_reg->imm += insn->imm; + return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && (dst_reg->type == PTR_TO_PACKET || diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c848e90b6421..8b433bf3fdd7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1809,16 +1809,22 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "unpriv: obfuscate stack pointer", + "stack pointer arithmetic", .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R2 pointer arithmetic", - .result_unpriv = REJECT, .result = ACCEPT, }, { From f94cc32ccbbab581ffd116795ac76c8ae69b8f22 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 May 2017 20:34:54 +0200 Subject: [PATCH 012/465] bpf, arm64: fix jit branch offset related to ldimm64 [ Upstream commit ddc665a4bb4b728b4e6ecec8db1b64efa9184b9c ] When the instruction right before the branch destination is a 64 bit load immediate, we currently calculate the wrong jump offset in the ctx->offset[] array as we only account one instruction slot for the 64 bit load immediate although it uses two BPF instructions. Fix it up by setting the offset into the right slot after we incremented the index. Before (ldimm64 test 1): [...] 00000020: 52800007 mov w7, #0x0 // #0 00000024: d2800060 mov x0, #0x3 // #3 00000028: d2800041 mov x1, #0x2 // #2 0000002c: eb01001f cmp x0, x1 00000030: 54ffff82 b.cs 0x00000020 00000034: d29fffe7 mov x7, #0xffff // #65535 00000038: f2bfffe7 movk x7, #0xffff, lsl #16 0000003c: f2dfffe7 movk x7, #0xffff, lsl #32 00000040: f2ffffe7 movk x7, #0xffff, lsl #48 00000044: d29dddc7 mov x7, #0xeeee // #61166 00000048: f2bdddc7 movk x7, #0xeeee, lsl #16 0000004c: f2ddddc7 movk x7, #0xeeee, lsl #32 00000050: f2fdddc7 movk x7, #0xeeee, lsl #48 [...] After (ldimm64 test 1): [...] 00000020: 52800007 mov w7, #0x0 // #0 00000024: d2800060 mov x0, #0x3 // #3 00000028: d2800041 mov x1, #0x2 // #2 0000002c: eb01001f cmp x0, x1 00000030: 540000a2 b.cs 0x00000044 00000034: d29fffe7 mov x7, #0xffff // #65535 00000038: f2bfffe7 movk x7, #0xffff, lsl #16 0000003c: f2dfffe7 movk x7, #0xffff, lsl #32 00000040: f2ffffe7 movk x7, #0xffff, lsl #48 00000044: d29dddc7 mov x7, #0xeeee // #61166 00000048: f2bdddc7 movk x7, #0xeeee, lsl #16 0000004c: f2ddddc7 movk x7, #0xeeee, lsl #32 00000050: f2fdddc7 movk x7, #0xeeee, lsl #48 [...] Also, add a couple of test cases to make sure JITs pass this test. Tested on Cavium ThunderX ARMv8. The added test cases all pass after the fix. Fixes: 8eee539ddea0 ("arm64: bpf: fix out-of-bounds read in bpf2a64_offset()") Reported-by: David S. Miller Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Xi Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit_comp.c | 8 +++---- lib/test_bpf.c | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a785554916c0..ce8ab0409deb 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -779,14 +779,14 @@ static int build_body(struct jit_ctx *ctx) int ret; ret = build_insn(insn, ctx); - - if (ctx->image == NULL) - ctx->offset[i] = ctx->idx; - if (ret > 0) { i++; + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; continue; } + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; if (ret) return ret; } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 0362da0b66c3..2e385026915c 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4656,6 +4656,51 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + /* Mainly testing JIT + imm64 here. */ + "JMP_JGE_X: ldimm64 test 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 2), + BPF_LD_IMM64(R0, 0xffffffffffffffffUL), + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xeeeeeeeeU } }, + }, + { + "JMP_JGE_X: ldimm64 test 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 0), + BPF_LD_IMM64(R0, 0xffffffffffffffffUL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffffU } }, + }, + { + "JMP_JGE_X: ldimm64 test 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 4), + BPF_LD_IMM64(R0, 0xffffffffffffffffUL), + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JNE | BPF_X */ { "JMP_JNE_X: if (3 != 2) return 1", From a1baca92afd5c420fb854ed79cb8cd3d8be29fbb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 May 2017 15:29:48 -0700 Subject: [PATCH 013/465] tcp: fix wraparound issue in tcp_lp [ Upstream commit a9f11f963a546fea9144f6a6d1a307e814a387e7 ] Be careful when comparing tcp_time_stamp to some u32 quantity, otherwise result can be surprising. Fixes: 7c106d7e782b ("[TCP]: TCP Low Priority congestion control") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_lp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 046fd3910873..d6fb6c067af4 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -264,13 +264,15 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); + u32 delta; if (sample->rtt_us > 0) tcp_lp_rtt_sample(sk, sample->rtt_us); /* calc inference */ - if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) - lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); + delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + if ((s32)delta > 0) + lp->inference = 3 * delta; /* test if within inference */ if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) From 23e761f37b4fa1c6420c5d72a56fd63d575dc4c0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 2 May 2017 14:43:44 -0700 Subject: [PATCH 014/465] net: ipv6: Do not duplicate DAD on link up [ Upstream commit 6d717134a1a6e1b34a7d0d70e953037bc2642046 ] Andrey reported a warning triggered by the rcu code: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 5911 at lib/debugobjects.c:289 debug_print_object+0x175/0x210 ODEBUG: activate active (active state 1) object type: rcu_head hint: (null) Modules linked in: CPU: 1 PID: 5911 Comm: a.out Not tainted 4.11.0-rc8+ #271 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x192/0x22d lib/dump_stack.c:52 __warn+0x19f/0x1e0 kernel/panic.c:549 warn_slowpath_fmt+0xe0/0x120 kernel/panic.c:564 debug_print_object+0x175/0x210 lib/debugobjects.c:286 debug_object_activate+0x574/0x7e0 lib/debugobjects.c:442 debug_rcu_head_queue kernel/rcu/rcu.h:75 __call_rcu.constprop.76+0xff/0x9c0 kernel/rcu/tree.c:3229 call_rcu_sched+0x12/0x20 kernel/rcu/tree.c:3288 rt6_rcu_free net/ipv6/ip6_fib.c:158 rt6_release+0x1ea/0x290 net/ipv6/ip6_fib.c:188 fib6_del_route net/ipv6/ip6_fib.c:1461 fib6_del+0xa42/0xdc0 net/ipv6/ip6_fib.c:1500 __ip6_del_rt+0x100/0x160 net/ipv6/route.c:2174 ip6_del_rt+0x140/0x1b0 net/ipv6/route.c:2187 __ipv6_ifa_notify+0x269/0x780 net/ipv6/addrconf.c:5520 addrconf_ifdown+0xe60/0x1a20 net/ipv6/addrconf.c:3672 ... Andrey's reproducer program runs in a very tight loop, calling 'unshare -n' and then spawning 2 sets of 14 threads running random ioctl calls. The relevant networking sequence: 1. New network namespace created via unshare -n - ip6tnl0 device is created in down state 2. address added to ip6tnl0 - equivalent to ip -6 addr add dev ip6tnl0 fd00::bb/1 - DAD is started on the address and when it completes the host route is inserted into the FIB 3. ip6tnl0 is brought up - the new fixup_permanent_addr function restarts DAD on the address 4. exit namespace - teardown / cleanup sequence starts - once in a blue moon, lo teardown appears to happen BEFORE teardown of ip6tunl0 + down on 'lo' removes the host route from the FIB since the dst->dev for the route is loobback + host route added to rcu callback list * rcu callback has not run yet, so rt is NOT on the gc list so it has NOT been marked obsolete 5. in parallel to 4. worker_thread runs addrconf_dad_completed - DAD on the address on ip6tnl0 completes - calls ipv6_ifa_notify which inserts the host route All of that happens very quickly. The result is that a host route that has been deleted from the IPv6 FIB and added to the RCU list is re-inserted into the FIB. The exit namespace eventually gets to cleaning up ip6tnl0 which removes the host route from the FIB again, calls the rcu function for cleanup -- and triggers the double rcu trace. The root cause is duplicate DAD on the address -- steps 2 and 3. Arguably, DAD should not be started in step 2. The interface is in the down state, so it can not really send out requests for the address which makes starting DAD pointless. Since the second DAD was introduced by a recent change, seems appropriate to use it for the Fixes tag and have the fixup function only start DAD for addresses in the PREDAD state which occurs in addrconf_ifdown if the address is retained. Big thanks to Andrey for isolating a reliable reproducer for this problem. Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0ea96c4d334d..78b1b6ab3b2e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3296,7 +3296,8 @@ static int fixup_permanent_addr(struct inet6_dev *idev, idev->dev, 0, 0); } - addrconf_dad_start(ifp); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) + addrconf_dad_start(ifp); return 0; } @@ -3651,7 +3652,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (keep) { /* set state to skip the notifier below */ state = INET6_IFADDR_STATE_DEAD; - ifa->state = 0; + ifa->state = INET6_IFADDR_STATE_PREDAD; if (!(ifa->flags & IFA_F_NODAD)) ifa->flags |= IFA_F_TENTATIVE; From 0b1d889cb0c9ebc706e459d459822f7a38cc88ae Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 3 May 2017 10:30:11 +0200 Subject: [PATCH 015/465] net: usb: qmi_wwan: add Telit ME910 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4c54dc0277d0d55a9248c43aebd31858f926a056 ] This patch adds support for Telit ME910 PID 0x1100. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 2474618404f5..4e34568db64f 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -907,6 +907,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ + {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ From 86a9a0884d287a9417193179fcd10e7b63f136c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 May 2017 06:39:31 -0700 Subject: [PATCH 016/465] tcp: do not inherit fastopen_req from parent [ Upstream commit 8b485ce69876c65db12ed390e7f9c0d2a64eff2c ] Under fuzzer stress, it is possible that a child gets a non NULL fastopen_req pointer from its parent at accept() time, when/if parent morphs from listener to active session. We need to make sure this can not happen, by clearing the field after socket cloning. BUG: Double free or freeing an invalid pointer Unexpected shadow byte: 0xFB CPU: 3 PID: 20933 Comm: syz-executor3 Not tainted 4.11.0+ #306 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x292/0x395 lib/dump_stack.c:52 kasan_object_err+0x1c/0x70 mm/kasan/report.c:164 kasan_report_double_free+0x5c/0x70 mm/kasan/report.c:185 kasan_slab_free+0x9d/0xc0 mm/kasan/kasan.c:580 slab_free_hook mm/slub.c:1357 [inline] slab_free_freelist_hook mm/slub.c:1379 [inline] slab_free mm/slub.c:2961 [inline] kfree+0xe8/0x2b0 mm/slub.c:3882 tcp_free_fastopen_req net/ipv4/tcp.c:1077 [inline] tcp_disconnect+0xc15/0x13e0 net/ipv4/tcp.c:2328 inet_child_forget+0xb8/0x600 net/ipv4/inet_connection_sock.c:898 inet_csk_reqsk_queue_add+0x1e7/0x250 net/ipv4/inet_connection_sock.c:928 tcp_get_cookie_sock+0x21a/0x510 net/ipv4/syncookies.c:217 cookie_v4_check+0x1a19/0x28b0 net/ipv4/syncookies.c:384 tcp_v4_cookie_check net/ipv4/tcp_ipv4.c:1384 [inline] tcp_v4_do_rcv+0x731/0x940 net/ipv4/tcp_ipv4.c:1421 tcp_v4_rcv+0x2dc0/0x31c0 net/ipv4/tcp_ipv4.c:1715 ip_local_deliver_finish+0x4cc/0xc20 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:257 [inline] ip_local_deliver+0x1ce/0x700 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:492 [inline] ip_rcv_finish+0xb1d/0x20b0 net/ipv4/ip_input.c:396 NF_HOOK include/linux/netfilter.h:257 [inline] ip_rcv+0xd8c/0x19c0 net/ipv4/ip_input.c:487 __netif_receive_skb_core+0x1ad1/0x3400 net/core/dev.c:4210 __netif_receive_skb+0x2a/0x1a0 net/core/dev.c:4248 process_backlog+0xe5/0x6c0 net/core/dev.c:4868 napi_poll net/core/dev.c:5270 [inline] net_rx_action+0xe70/0x18e0 net/core/dev.c:5335 __do_softirq+0x2fb/0xb99 kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:899 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328 do_softirq kernel/softirq.c:176 [inline] __local_bh_enable_ip+0x1cf/0x1e0 kernel/softirq.c:181 local_bh_enable include/linux/bottom_half.h:31 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:931 [inline] ip_finish_output2+0x9ab/0x15e0 net/ipv4/ip_output.c:230 ip_finish_output+0xa35/0xdf0 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip_output+0x1f6/0x7b0 net/ipv4/ip_output.c:404 dst_output include/net/dst.h:486 [inline] ip_local_out+0x95/0x160 net/ipv4/ip_output.c:124 ip_queue_xmit+0x9a8/0x1a10 net/ipv4/ip_output.c:503 tcp_transmit_skb+0x1ade/0x3470 net/ipv4/tcp_output.c:1057 tcp_write_xmit+0x79e/0x55b0 net/ipv4/tcp_output.c:2265 __tcp_push_pending_frames+0xfa/0x3a0 net/ipv4/tcp_output.c:2450 tcp_push+0x4ee/0x780 net/ipv4/tcp.c:683 tcp_sendmsg+0x128d/0x39b0 net/ipv4/tcp.c:1342 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1696 SyS_sendto+0x40/0x50 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x446059 RSP: 002b:00007faa6761fb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 0000000000446059 RDX: 0000000000000001 RSI: 0000000020ba3fcd RDI: 0000000000000017 RBP: 00000000006e40a0 R08: 0000000020ba4ff0 R09: 0000000000000010 R10: 0000000020000000 R11: 0000000000000282 R12: 0000000000708150 R13: 0000000000000000 R14: 00007faa676209c0 R15: 00007faa67620700 Object at ffff88003b5bbcb8, in cache kmalloc-64 size: 64 Allocated: PID = 20909 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:513 set_track mm/kasan/kasan.c:525 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:616 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2745 kmalloc include/linux/slab.h:490 [inline] kzalloc include/linux/slab.h:663 [inline] tcp_sendmsg_fastopen net/ipv4/tcp.c:1094 [inline] tcp_sendmsg+0x221a/0x39b0 net/ipv4/tcp.c:1139 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1696 SyS_sendto+0x40/0x50 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed: PID = 20909 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:513 set_track mm/kasan/kasan.c:525 [inline] kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:589 slab_free_hook mm/slub.c:1357 [inline] slab_free_freelist_hook mm/slub.c:1379 [inline] slab_free mm/slub.c:2961 [inline] kfree+0xe8/0x2b0 mm/slub.c:3882 tcp_free_fastopen_req net/ipv4/tcp.c:1077 [inline] tcp_disconnect+0xc15/0x13e0 net/ipv4/tcp.c:2328 __inet_stream_connect+0x20c/0xf90 net/ipv4/af_inet.c:593 tcp_sendmsg_fastopen net/ipv4/tcp.c:1111 [inline] tcp_sendmsg+0x23a8/0x39b0 net/ipv4/tcp.c:1139 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1696 SyS_sendto+0x40/0x50 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 7db92362d2fe ("tcp: fix potential double free issue for fastopen_req") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Acked-by: Wei Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_minisocks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 65c0f3d13eca..c1259ccc422f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -536,6 +536,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; tcp_ecn_openreq_child(newtp, req); + newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; newtp->rack.mstamp.v64 = 0; From 165a007e51f0f8b93c6a83f80f261b020bb9dffe Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 3 May 2017 17:06:58 +0200 Subject: [PATCH 017/465] ipv4, ipv6: ensure raw socket message is big enough to hold an IP header [ Upstream commit 86f4c90a1c5c1493f07f2d12c1079f5bf01936f2 ] raw_send_hdrinc() and rawv6_send_hdrinc() expect that the buffer copied from the userspace contains the IPv4/IPv6 header, so if too few bytes are copied, parts of the header may remain uninitialized. This bug has been detected with KMSAN. For the record, the KMSAN report: ================================================================== BUG: KMSAN: use of unitialized memory in nf_ct_frag6_gather+0xf5a/0x44a0 inter: 0 CPU: 0 PID: 1036 Comm: probe Not tainted 4.11.0-rc5+ #2455 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x143/0x1b0 lib/dump_stack.c:52 kmsan_report+0x16b/0x1e0 mm/kmsan/kmsan.c:1078 __kmsan_warning_32+0x5c/0xa0 mm/kmsan/kmsan_instr.c:510 nf_ct_frag6_gather+0xf5a/0x44a0 net/ipv6/netfilter/nf_conntrack_reasm.c:577 ipv6_defrag+0x1d9/0x280 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn ./include/linux/netfilter.h:102 nf_hook_slow+0x13f/0x3c0 net/netfilter/core.c:310 nf_hook ./include/linux/netfilter.h:212 NF_HOOK ./include/linux/netfilter.h:255 rawv6_send_hdrinc net/ipv6/raw.c:673 rawv6_sendmsg+0x2fcb/0x41a0 net/ipv6/raw.c:919 inet_sendmsg+0x3f8/0x6d0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x6a5/0x7c0 net/socket.c:1696 SyS_sendto+0xbc/0xe0 net/socket.c:1664 do_syscall_64+0x72/0xa0 arch/x86/entry/common.c:285 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 RIP: 0033:0x436e03 RSP: 002b:00007ffce48baf38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000436e03 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007ffce48baf90 R08: 00007ffce48baf50 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000401790 R14: 0000000000401820 R15: 0000000000000000 origin: 00000000d9400053 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:362 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:257 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:270 slab_alloc_node mm/slub.c:2735 __kmalloc_node_track_caller+0x1f4/0x390 mm/slub.c:4341 __kmalloc_reserve net/core/skbuff.c:138 __alloc_skb+0x2cd/0x740 net/core/skbuff.c:231 alloc_skb ./include/linux/skbuff.h:933 alloc_skb_with_frags+0x209/0xbc0 net/core/skbuff.c:4678 sock_alloc_send_pskb+0x9ff/0xe00 net/core/sock.c:1903 sock_alloc_send_skb+0xe4/0x100 net/core/sock.c:1920 rawv6_send_hdrinc net/ipv6/raw.c:638 rawv6_sendmsg+0x2918/0x41a0 net/ipv6/raw.c:919 inet_sendmsg+0x3f8/0x6d0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x6a5/0x7c0 net/socket.c:1696 SyS_sendto+0xbc/0xe0 net/socket.c:1664 do_syscall_64+0x72/0xa0 arch/x86/entry/common.c:285 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 ================================================================== , triggered by the following syscalls: socket(PF_INET6, SOCK_RAW, IPPROTO_RAW) = 3 sendto(3, NULL, 0, 0, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "ff00::", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = -1 EPERM A similar report is triggered in net/ipv4/raw.c if we use a PF_INET socket instead of a PF_INET6 one. Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/raw.c | 3 +++ net/ipv6/raw.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9d943974de2b..bdffad875691 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -358,6 +358,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, rt->dst.dev->mtu); return -EMSGSIZE; } + if (length < sizeof(struct iphdr)) + return -EINVAL; + if (flags&MSG_PROBE) goto out; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0da6a12b5472..1f992d9e261d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -632,6 +632,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } + if (length < sizeof(struct ipv6hdr)) + return -EINVAL; if (flags&MSG_PROBE) goto out; From 0913e5733167f958114327975898ea356161febc Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 4 May 2017 16:48:58 +0200 Subject: [PATCH 018/465] rtnetlink: NUL-terminate IFLA_PHYS_PORT_NAME string [ Upstream commit 77ef033b687c3e030017c94a29bf6ea3aaaef678 ] IFLA_PHYS_PORT_NAME is a string attribute, so terminate it with \0. Otherwise libnl3 fails to validate netlink messages with this attribute. "ip -detail a" assumes too that the attribute is NUL-terminated when printing it. It often was, due to padding. I noticed this as libvirtd failing to start on a system with sfc driver after upgrading it to Linux 4.11, i.e. when sfc added support for phys_port_name. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c4e84c558240..69daf393cbe1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1056,7 +1056,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) return err; } - if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name)) return -EMSGSIZE; return 0; From 5ee83127ac04d1d36f79c714c8342fc5f316ae60 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 3 May 2017 22:07:31 -0700 Subject: [PATCH 019/465] ipv6: initialize route null entry in addrconf_init() [ Upstream commit 2f460933f58eee3393aba64f0f6d14acb08d1724 ] Andrey reported a crash on init_net.ipv6.ip6_null_entry->rt6i_idev since it is always NULL. This is clearly wrong, we have code to initialize it to loopback_dev, unfortunately the order is still not correct. loopback_dev is registered very early during boot, we lose a chance to re-initialize it in notifier. addrconf_init() is called after ip6_route_init(), which means we have no chance to correct it. Fix it by moving this initialization explicitly after ipv6_add_dev(init_net.loopback_dev) in addrconf_init(). Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_route.h | 1 + net/ipv6/addrconf.c | 2 ++ net/ipv6/route.c | 26 +++++++++++++++----------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9dc2c182a263..f5e625f53367 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -84,6 +84,7 @@ struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int ifindex, struct flowi6 *fl6, int flags); +void ip6_route_init_special_entries(void); int ip6_route_init(void); void ip6_route_cleanup(void); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 78b1b6ab3b2e..5793b64f7743 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6409,6 +6409,8 @@ int __init addrconf_init(void) goto errlo; } + ip6_route_init_special_entries(); + for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_addr_lst[i]); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb174b590fd3..4933184b663b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4022,6 +4022,21 @@ static struct notifier_block ip6_route_dev_notifier = { .priority = 0, }; +void __init ip6_route_init_special_entries(void) +{ + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif +} + int __init ip6_route_init(void) { int ret; @@ -4048,17 +4063,6 @@ int __init ip6_route_init(void) ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; - /* Registering of the loopback is done before this portion of code, - * the loopback reference in rt6_info will not be taken, do it - * manually for init_net */ - init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #ifdef CONFIG_IPV6_MULTIPLE_TABLES - init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #endif ret = fib6_init(); if (ret) goto out_register_subsys; From 3960afa2e02f86463e893d0d5583b1e612f6f1ea Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 8 May 2017 10:12:13 -0700 Subject: [PATCH 020/465] ipv6: reorder ip6_route_dev_notifier after ipv6_dev_notf [ Upstream commit 242d3a49a2a1a71d8eb9f953db1bcaa9d698ce00 ] For each netns (except init_net), we initialize its null entry in 3 places: 1) The template itself, as we use kmemdup() 2) Code around dst_init_metrics() in ip6_route_net_init() 3) ip6_route_dev_notify(), which is supposed to initialize it after loopback registers Unfortunately the last one still happens in a wrong order because we expect to initialize net->ipv6.ip6_null_entry->rt6i_idev to net->loopback_dev's idev, thus we have to do that after we add idev to loopback. However, this notifier has priority == 0 same as ipv6_dev_notf, and ipv6_dev_notf is registered after ip6_route_dev_notifier so it is called actually after ip6_route_dev_notifier. This is similar to commit 2f460933f58e ("ipv6: initialize route null entry in addrconf_init()") which fixes init_net. Fix it by picking a smaller priority for ip6_route_dev_notifier. Also, we have to release the refcnt accordingly when unregistering loopback_dev because device exit functions are called before subsys exit functions. Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/addrconf.h | 2 ++ net/ipv6/addrconf.c | 1 + net/ipv6/route.c | 13 +++++++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 17c6fd84e287..4d93c5ec9b12 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -20,6 +20,8 @@ #define ADDRCONF_TIMER_FUZZ (HZ / 4) #define ADDRCONF_TIMER_FUZZ_MAX (HZ) +#define ADDRCONF_NOTIFY_PRIORITY 0 + #include #include diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5793b64f7743..311f45641673 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3516,6 +3516,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, */ static struct notifier_block ipv6_dev_notf = { .notifier_call = addrconf_notify, + .priority = ADDRCONF_NOTIFY_PRIORITY, }; static void addrconf_type_change(struct net_device *dev, unsigned long event) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4933184b663b..d316d00e11ab 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3704,7 +3704,10 @@ static int ip6_route_dev_notify(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) + return NOTIFY_OK; + + if (event == NETDEV_REGISTER) { net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -3712,6 +3715,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); +#endif + } else if (event == NETDEV_UNREGISTER) { + in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); + in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev); #endif } @@ -4019,7 +4028,7 @@ static struct pernet_operations ip6_route_net_late_ops = { static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, - .priority = 0, + .priority = ADDRCONF_NOTIFY_PRIORITY - 10, }; void __init ip6_route_init_special_entries(void) From f8e3892f9f237b96dcba5560d2050bffcbfdbb32 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 May 2017 06:56:54 -0700 Subject: [PATCH 021/465] tcp: randomize timestamps on syncookies [ Upstream commit 84b114b98452c431299d99c135f751659e517acb ] Whole point of randomization was to hide server uptime, but an attacker can simply start a syn flood and TCP generates 'old style' timestamps, directly revealing server jiffies value. Also, TSval sent by the server to a particular remote address vary depending on syncookies being sent or not, potentially triggering PAWS drops for innocent clients. Lets implement proper randomization, including for SYNcookies. Also we do not need to export sysctl_tcp_timestamps, since it is not used from a module. In v2, I added Florian feedback and contribution, adding tsoff to tcp_get_cookie_sock(). v3 removed one unused variable in tcp_v4_connect() as Florian spotted. Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection") Signed-off-by: Eric Dumazet Reviewed-by: Florian Westphal Tested-by: Florian Westphal Cc: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/secure_seq.h | 10 ++++++---- include/net/tcp.h | 5 +++-- net/core/secure_seq.c | 34 ++++++++++++++++++++-------------- net/ipv4/syncookies.c | 12 ++++++++++-- net/ipv4/tcp_input.c | 8 +++----- net/ipv4/tcp_ipv4.c | 32 +++++++++++++++++++------------- net/ipv6/syncookies.c | 10 +++++++++- net/ipv6/tcp_ipv6.c | 32 +++++++++++++++++++------------- 8 files changed, 89 insertions(+), 54 deletions(-) diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 0caee631a836..b94006f6fbdd 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -6,10 +6,12 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff); -u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff); +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr); +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport); +u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ec4ea652f3f..6423b4698880 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -471,7 +471,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst); + struct dst_entry *dst, u32 tsoff); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); @@ -1816,7 +1816,8 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req, bool *strict); - __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff); + u32 (*init_seq)(const struct sk_buff *skb); + u32 (*init_ts_off)(const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index d28da7d363f1..ae35cce3a40d 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -24,9 +24,13 @@ static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { - net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } + +static __always_inline void ts_secret_init(void) +{ + net_get_random_once(&ts_secret, sizeof(ts_secret)); +} #endif #ifdef CONFIG_INET @@ -47,7 +51,7 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; @@ -60,12 +64,14 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } +EXPORT_SYMBOL(secure_tcpv6_ts_off); -u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; @@ -78,14 +84,14 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, .sport = sport, .dport = dport }; - u64 hash; + u32 hash; + net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } -EXPORT_SYMBOL(secure_tcpv6_sequence_number); +EXPORT_SYMBOL(secure_tcpv6_seq); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) @@ -107,30 +113,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) { if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash_2u32((__force u32)saddr, (__force u32)daddr, &ts_secret); } -/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), +/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ - -u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) { - u64 hash; + u32 hash; + net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 496b97e17aaf..0257d965f111 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -203,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, u32 tsoff) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -213,6 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, NULL, &own_req); if (child) { atomic_set(&req->rsk_refcnt, 1); + tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); inet_csk_reqsk_queue_add(sk, req, child); } else { @@ -292,6 +294,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -311,6 +314,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -381,7 +389,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); - ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 659d1baefb2b..3c6c8787b42e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,6 @@ int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); -EXPORT_SYMBOL(sysctl_tcp_timestamps); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; @@ -6332,8 +6331,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - if (isn && tmp_opt.tstamp_ok) - af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); + if (tmp_opt.tstamp_ok) + tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb); if (!want_cookie && !isn) { /* VJ's idea. We save last timestamp seen @@ -6375,7 +6374,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); + isn = af_ops->init_seq(skb); } if (!dst) { dst = af_ops->route_req(sk, &fl, req, NULL); @@ -6387,7 +6386,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - tcp_rsk(req)->ts_off = 0; req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 575e19dcc017..1a5fa95c981f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,12 +94,18 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v4_init_seq(const struct sk_buff *skb) { - return secure_tcp_sequence_number(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcp_seq(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v4_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcp_ts_off(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -145,7 +151,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct flowi4 *fl4; struct rtable *rt; int err; - u32 seq; struct ip_options_rcu *inet_opt; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -236,13 +241,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; if (likely(!tp->repair)) { - seq = secure_tcp_sequence_number(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcp_seq(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port); + tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr, + inet->inet_daddr); } inet->inet_id = tp->write_seq ^ jiffies; @@ -1253,7 +1258,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq = tcp_v4_init_sequence, + .init_seq = tcp_v4_init_seq, + .init_ts_off = tcp_v4_init_ts_off, .send_synack = tcp_v4_send_synack, }; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 895ff650db43..5abc3692b901 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -143,6 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -162,6 +164,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -242,7 +250,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - ret = tcp_get_cookie_sock(sk, skb, req, dst); + ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); out: return ret; out_free: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 49fa2e8c3fa9..4c4afdca41ff 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,12 +101,18 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v6_init_seq(const struct sk_buff *skb) { - return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v6_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -122,7 +128,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct flowi6 fl6; struct dst_entry *dst; int addr_type; - u32 seq; int err; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -287,13 +292,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk_set_txhash(sk); if (likely(!tp->repair)) { - seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport); + tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32); } if (tcp_fastopen_defer_connect(sk, &err)) @@ -757,7 +762,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, - .init_seq = tcp_v6_init_sequence, + .init_seq = tcp_v6_init_seq, + .init_ts_off = tcp_v6_init_ts_off, .send_synack = tcp_v6_send_synack, }; From 7a0a483ec578b51570e22d488cb2bb6be62d4fdb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 6 May 2017 03:49:01 +0300 Subject: [PATCH 022/465] bnxt_en: allocate enough space for ->ntp_fltr_bmap [ Upstream commit ac45bd93a5035c2f39c9862b8b6ed692db0fdc87 ] We have the number of longs, but we need to calculate the number of bytes required. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Dan Carpenter Acked-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1f1e54ba0ecb..2c02a4cebc24 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3000,7 +3000,8 @@ static int bnxt_alloc_ntp_fltrs(struct bnxt *bp) INIT_HLIST_HEAD(&bp->ntp_fltr_hash_tbl[i]); bp->ntp_fltr_count = 0; - bp->ntp_fltr_bmap = kzalloc(BITS_TO_LONGS(BNXT_NTP_FLTR_MAX_FLTR), + bp->ntp_fltr_bmap = kcalloc(BITS_TO_LONGS(BNXT_NTP_FLTR_MAX_FLTR), + sizeof(long), GFP_KERNEL); if (!bp->ntp_fltr_bmap) From ced12308e58cc95002404b584fdc756a233581ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 May 2017 00:04:09 +0200 Subject: [PATCH 023/465] bpf: don't let ldimm64 leak map addresses on unprivileged [ Upstream commit 0d0e57697f162da4aa218b5feafe614fb666db07 ] The patch fixes two things at once: 1) It checks the env->allow_ptr_leaks and only prints the map address to the log if we have the privileges to do so, otherwise it just dumps 0 as we would when kptr_restrict is enabled on %pK. Given the latter is off by default and not every distro sets it, I don't want to rely on this, hence the 0 by default for unprivileged. 2) Printing of ldimm64 in the verifier log is currently broken in that we don't print the full immediate, but only the 32 bit part of the first insn part for ldimm64. Thus, fix this up as well; it's okay to access, since we verified all ldimm64 earlier already (including just constants) through replace_map_fd_with_map_ptr(). Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") Fixes: cbd357008604 ("bpf: verifier (add ability to receive verification log)") Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f27849e580c6..6fd78d4c4164 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -296,7 +296,8 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; -static void print_bpf_insn(struct bpf_insn *insn) +static void print_bpf_insn(const struct bpf_verifier_env *env, + const struct bpf_insn *insn) { u8 class = BPF_CLASS(insn->code); @@ -360,9 +361,19 @@ static void print_bpf_insn(struct bpf_insn *insn) insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->src_reg, insn->imm); - } else if (BPF_MODE(insn->code) == BPF_IMM) { - verbose("(%02x) r%d = 0x%x\n", - insn->code, insn->dst_reg, insn->imm); + } else if (BPF_MODE(insn->code) == BPF_IMM && + BPF_SIZE(insn->code) == BPF_DW) { + /* At this point, we already made sure that the second + * part of the ldimm64 insn is accessible. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; + + if (map_ptr && !env->allow_ptr_leaks) + imm = 0; + + verbose("(%02x) r%d = 0x%llx\n", insn->code, + insn->dst_reg, (unsigned long long)imm); } else { verbose("BUG_ld_%02x\n", insn->code); return; @@ -2835,7 +2846,7 @@ static int do_check(struct bpf_verifier_env *env) if (log_level) { verbose("%d: ", insn_idx); - print_bpf_insn(insn); + print_bpf_insn(env, insn); } err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); From 275e2edbb8f1d13a58320cf205d710df70733fb5 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 8 May 2017 17:48:35 -0400 Subject: [PATCH 024/465] net: mdio-mux: bcm-iproc: call mdiobus_free() in error path [ Upstream commit 922c60e89d52730050c6ccca218bff40cc8bcd8e ] If an error is encountered in mdio_mux_init(), the error path will call mdiobus_free(). Since mdiobus_register() has been called prior to mdio_mux_init(), the bus->state will not be MDIOBUS_UNREGISTERED. This causes a BUG_ON() in mdiobus_free(). To correct this issue, add an error path for mdio_mux_init() which calls mdiobus_unregister() prior to mdiobus_free(). Signed-off-by: Jon Mason Fixes: 98bc865a1ec8 ("net: mdio-mux: Add MDIO mux driver for iProc SoCs") Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio-mux-bcm-iproc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c index 0a0412524cec..0a5f62e0efcc 100644 --- a/drivers/net/phy/mdio-mux-bcm-iproc.c +++ b/drivers/net/phy/mdio-mux-bcm-iproc.c @@ -203,11 +203,14 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev) &md->mux_handle, md, md->mii_bus); if (rc) { dev_info(md->dev, "mdiomux initialization failed\n"); - goto out; + goto out_register; } dev_info(md->dev, "iProc mdiomux registered\n"); return 0; + +out_register: + mdiobus_unregister(bus); out: mdiobus_free(bus); return rc; From c7f765b5d6bda480ae1aa2bf5734c9613d851f5f Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Tue, 25 Apr 2017 16:28:48 -0700 Subject: [PATCH 025/465] f2fs: sanity check segment count commit b9dd46188edc2f0d1f37328637860bb65a771124 upstream. F2FS uses 4 bytes to represent block address. As a result, supported size of disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. Signed-off-by: Jin Qian Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 7 +++++++ include/linux/f2fs_fs.h | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 96fe8ed73100..858aef564a58 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1483,6 +1483,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } + if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { + f2fs_msg(sb, KERN_INFO, + "Invalid segment count (%u)", + le32_to_cpu(raw_super->segment_count)); + return 1; + } + /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) return 1; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index e2d239ed4c60..661200e6d281 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -301,6 +301,12 @@ struct f2fs_nat_block { #define SIT_VBLOCK_MAP_SIZE 64 #define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +/* + * F2FS uses 4 bytes to represent block address. As a result, supported size of + * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. + */ +#define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2) + /* * Note that f2fs_sit_entry->vblocks has the following bit-field information. * [15:10] : allocation type such as CURSEG_XXXX_TYPE From 5e25479ea90cdcbbda6c9db789702bf258b411e9 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 13 Apr 2017 14:04:21 -0700 Subject: [PATCH 026/465] xen/arm,arm64: fix xen_dma_ops after 815dd18 "Consolidate get_dma_ops..." commit e058632670b709145730a134acc3f83f392f7aa7 upstream. The following commit: commit 815dd18788fe0d41899f51b91d0560279cf16b0d Author: Bart Van Assche Date: Fri Jan 20 13:04:04 2017 -0800 treewide: Consolidate get_dma_ops() implementations rearranges get_dma_ops in a way that xen_dma_ops are not returned when running on Xen anymore, dev->dma_ops is returned instead (see arch/arm/include/asm/dma-mapping.h:get_arch_dma_ops and include/linux/dma-mapping.h:get_dma_ops). Fix the problem by storing dev->dma_ops in dev_archdata, and setting dev->dma_ops to xen_dma_ops. This way, xen_dma_ops is returned naturally by get_dma_ops. The Xen code can retrieve the original dev->dma_ops from dev_archdata when needed. It also allows us to remove __generic_dma_ops from common headers. Signed-off-by: Stefano Stabellini Tested-by: Julien Grall Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas CC: linux@armlinux.org.uk CC: catalin.marinas@arm.com CC: will.deacon@arm.com CC: boris.ostrovsky@oracle.com CC: jgross@suse.com CC: Julien Grall Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/device.h | 3 +++ arch/arm/include/asm/dma-mapping.h | 12 +----------- arch/arm/mm/dma-mapping.c | 7 +++++++ arch/arm64/include/asm/device.h | 3 +++ arch/arm64/include/asm/dma-mapping.h | 13 +------------ arch/arm64/mm/dma-mapping.c | 7 +++++++ include/xen/arm/page-coherent.h | 8 ++++++++ 7 files changed, 30 insertions(+), 23 deletions(-) diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 220ba207be91..36ec9c8f6e16 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -15,6 +15,9 @@ struct dev_archdata { #endif #ifdef CONFIG_ARM_DMA_USE_IOMMU struct dma_iommu_mapping *mapping; +#endif +#ifdef CONFIG_XEN + const struct dma_map_ops *dev_dma_ops; #endif bool dma_coherent; }; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 716656925975..680d3f3889e7 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -16,19 +16,9 @@ extern const struct dma_map_ops arm_dma_ops; extern const struct dma_map_ops arm_coherent_dma_ops; -static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) -{ - if (dev && dev->dma_ops) - return dev->dma_ops; - return &arm_dma_ops; -} - static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (xen_initial_domain()) - return xen_dma_ops; - else - return __generic_dma_ops(NULL); + return &arm_dma_ops; } #define HAVE_ARCH_DMA_SUPPORTED 1 diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 475811f5383a..0268584f1fa0 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2414,6 +2414,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dma_ops = arm_get_dma_map_ops(coherent); set_dma_ops(dev, dma_ops); + +#ifdef CONFIG_XEN + if (xen_initial_domain()) { + dev->archdata.dev_dma_ops = dev->dma_ops; + dev->dma_ops = xen_dma_ops; + } +#endif } void arch_teardown_dma_ops(struct device *dev) diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 73d5bab015eb..5a5fa47a6b18 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -19,6 +19,9 @@ struct dev_archdata { #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ +#endif +#ifdef CONFIG_XEN + const struct dma_map_ops *dev_dma_ops; #endif bool dma_coherent; }; diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 505756cdc67a..5392dbeffa45 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -27,11 +27,8 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0) extern const struct dma_map_ops dummy_dma_ops; -static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (dev && dev->dma_ops) - return dev->dma_ops; - /* * We expect no ISA devices, and all other DMA masters are expected to * have someone call arch_setup_dma_ops at device creation time. @@ -39,14 +36,6 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) return &dummy_dma_ops; } -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - if (xen_initial_domain()) - return xen_dma_ops; - else - return __generic_dma_ops(NULL); -} - void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); #define arch_setup_dma_ops arch_setup_dma_ops diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 81cdb2e844ed..7f8b37e85a2b 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -977,4 +977,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->archdata.dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); + +#ifdef CONFIG_XEN + if (xen_initial_domain()) { + dev->archdata.dev_dma_ops = dev->dma_ops; + dev->dma_ops = xen_dma_ops; + } +#endif } diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h index 95ce6ac3a971..b0a2bfc8d647 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/arm/page-coherent.h @@ -2,8 +2,16 @@ #define _ASM_ARM_XEN_PAGE_COHERENT_H #include +#include #include +static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dev_dma_ops) + return dev->archdata.dev_dma_ops; + return get_arch_dma_ops(NULL); +} + void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); From 3ed024e27424c98237aae18b968c9a3bb69358e6 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 24 Apr 2017 15:04:53 -0400 Subject: [PATCH 027/465] xen: Revert commits da72ff5bfcb0 and 72a9b186292d commit 84d582d236dc1f9085e741affc72e9ba061a67c2 upstream. Recent discussion (http://marc.info/?l=xen-devel&m=149192184523741) established that commit 72a9b186292d ("xen: Remove event channel notification through Xen PCI platform device") (and thus commit da72ff5bfcb0 ("partially revert "xen: Remove event channel notification through Xen PCI platform device"")) are unnecessary and, in fact, prevent HVM guests from booting on Xen releases prior to 4.0 Therefore we revert both of those commits. The summary of that discussion is below: Here is the brief summary of the current situation: Before the offending commit (72a9b186292): 1) INTx does not work because of the reset_watches path. 2) The reset_watches path is only taken if you have Xen > 4.0 3) The Linux Kernel by default will use vector inject if the hypervisor support. So even INTx does not work no body running the kernel with Xen > 4.0 would notice. Unless he explicitly disabled this feature either in the kernel or in Xen (and this can only be disabled by modifying the code, not user-supported way to do it). After the offending commit (+ partial revert): 1) INTx is no longer support for HVM (only for PV guests). 2) Any HVM guest The kernel will not boot on Xen < 4.0 which does not have vector injection support. Since the only other mode supported is INTx which. So based on this summary, I think before commit (72a9b186292) we were in much better position from a user point of view. Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Cc: Boris Ostrovsky Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Konrad Rzeszutek Wilk Cc: Bjorn Helgaas Cc: Stefano Stabellini Cc: Julien Grall Cc: Vitaly Kuznetsov Cc: Paul Gortmaker Cc: Ross Lagerwall Cc: xen-devel@lists.xenproject.org Cc: linux-kernel@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: Anthony Liguori Cc: KarimAllah Ahmed Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/xen/events.h | 11 +++++++++++ arch/x86/pci/xen.c | 2 +- arch/x86/xen/enlighten.c | 16 +++++++++++----- arch/x86/xen/smp.c | 2 ++ arch/x86/xen/time.c | 5 +++++ drivers/xen/events/events_base.c | 26 +++++++++++++++++--------- drivers/xen/platform-pci.c | 13 +++---------- 7 files changed, 50 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 608a79d5a466..e6911caf5bbf 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) /* No need for a barrier -- XCHG is a barrier on x86. */ #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) +extern int xen_have_vector_callback; + +/* + * Events delivered via platform PCI interrupts are always + * routed to vcpu 0 and hence cannot be rebound. + */ +static inline bool xen_support_evtchn_rebind(void) +{ + return (!xen_hvm_domain() || xen_have_vector_callback); +} + #endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 292ab0364a89..c4b3646bd04c 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -447,7 +447,7 @@ void __init xen_msi_init(void) int __init pci_xen_hvm_init(void) { - if (!xen_feature(XENFEAT_hvm_pirqs)) + if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs)) return 0; #ifdef CONFIG_ACPI diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ec1d5c46e58f..29b239025b57 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -138,6 +138,8 @@ struct shared_info xen_dummy_shared_info; void *xen_initial_gdt; RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); +__read_mostly int xen_have_vector_callback; +EXPORT_SYMBOL_GPL(xen_have_vector_callback); static int xen_cpu_up_prepare(unsigned int cpu); static int xen_cpu_up_online(unsigned int cpu); @@ -1861,7 +1863,9 @@ static int xen_cpu_up_prepare(unsigned int cpu) xen_vcpu_setup(cpu); } - if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) + if (xen_pv_domain() || + (xen_have_vector_callback && + xen_feature(XENFEAT_hvm_safe_pvclock))) xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); @@ -1877,7 +1881,9 @@ static int xen_cpu_dead(unsigned int cpu) { xen_smp_intr_free(cpu); - if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) + if (xen_pv_domain() || + (xen_have_vector_callback && + xen_feature(XENFEAT_hvm_safe_pvclock))) xen_teardown_timer(cpu); return 0; @@ -1916,8 +1922,8 @@ static void __init xen_hvm_guest_init(void) xen_panic_handler_init(); - BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector)); - + if (xen_feature(XENFEAT_hvm_callback_vector)) + xen_have_vector_callback = 1; xen_hvm_smp_init(); WARN_ON(xen_cpuhp_setup()); xen_unplug_emulated_devices(); @@ -1958,7 +1964,7 @@ bool xen_hvm_need_lapic(void) return false; if (!xen_hvm_domain()) return false; - if (xen_feature(XENFEAT_hvm_pirqs)) + if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback) return false; return true; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7ff2f1bfb7ec..4e6b65baf8e2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -742,6 +742,8 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) void __init xen_hvm_smp_init(void) { + if (!xen_have_vector_callback) + return; smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; smp_ops.cpu_die = xen_cpu_die; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 1e69956d7852..4535627cf532 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -432,6 +432,11 @@ static void xen_hvm_setup_cpu_clockevents(void) void __init xen_hvm_init_time_ops(void) { + /* vector callback is needed otherwise we cannot receive interrupts + * on cpu > 0 and at this point we don't know how many cpus are + * available */ + if (!xen_have_vector_callback) + return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { printk(KERN_INFO "Xen doesn't support pvclock on HVM," "disable pv timer\n"); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6a53577772c9..42807ce11c42 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1312,6 +1312,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) if (!VALID_EVTCHN(evtchn)) return -1; + if (!xen_support_evtchn_rebind()) + return -1; + /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; bind_vcpu.vcpu = xen_vcpu_nr(tcpu); @@ -1645,15 +1648,20 @@ void xen_callback_vector(void) { int rc; uint64_t callback_via; - - callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); - rc = xen_set_callback_via(callback_via); - BUG_ON(rc); - pr_info("Xen HVM callback vector for event delivery is enabled\n"); - /* in the restore case the vector has already been allocated */ - if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, - xen_hvm_callback_vector); + if (xen_have_vector_callback) { + callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); + rc = xen_set_callback_via(callback_via); + if (rc) { + pr_err("Request for Xen HVM callback vector failed\n"); + xen_have_vector_callback = 0; + return; + } + pr_info("Xen HVM callback vector for event delivery is enabled\n"); + /* in the restore case the vector has already been allocated */ + if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, + xen_hvm_callback_vector); + } } #else void xen_callback_vector(void) {} diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 2a165cc8a43c..1c4deac9b0f8 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -67,7 +67,7 @@ static uint64_t get_callback_via(struct pci_dev *pdev) pin = pdev->pin; /* We don't know the GSI. Specify the PCI INTx line instead. */ - return ((uint64_t)0x01 << HVM_CALLBACK_VIA_TYPE_SHIFT) | /* PCI INTx identifier */ + return ((uint64_t)0x01 << 56) | /* PCI INTx identifier */ ((uint64_t)pci_domain_nr(pdev->bus) << 32) | ((uint64_t)pdev->bus->number << 16) | ((uint64_t)(pdev->devfn & 0xff) << 8) | @@ -90,7 +90,7 @@ static int xen_allocate_irq(struct pci_dev *pdev) static int platform_pci_resume(struct pci_dev *pdev) { int err; - if (!xen_pv_domain()) + if (xen_have_vector_callback) return 0; err = xen_set_callback_via(callback_via); if (err) { @@ -138,14 +138,7 @@ static int platform_pci_probe(struct pci_dev *pdev, platform_mmio = mmio_addr; platform_mmiolen = mmio_len; - /* - * Xen HVM guests always use the vector callback mechanism. - * L1 Dom0 in a nested Xen environment is a PV guest inside in an - * HVM environment. It needs the platform-pci driver to get - * notifications from L0 Xen, but it cannot use the vector callback - * as it is not exported by L1 Xen. - */ - if (xen_pv_domain()) { + if (!xen_have_vector_callback) { ret = xen_allocate_irq(pdev); if (ret) { dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); From 0873ab02247ec7453b0758e99f125587bfb4cd85 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 18 Apr 2017 18:43:20 +0200 Subject: [PATCH 028/465] block: get rid of blk_integrity_revalidate() commit 19b7ccf8651df09d274671b53039c672a52ad84d upstream. Commit 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") introduced blk_integrity_revalidate(), which seems to assume ownership of the stable pages flag and unilaterally clears it if no blk_integrity profile is registered: if (bi->profile) disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; else disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; It's called from revalidate_disk() and rescan_partitions(), making it impossible to enable stable pages for drivers that support partitions and don't use blk_integrity: while the call in revalidate_disk() can be trivially worked around (see zram, which doesn't support partitions and hence gets away with zram_revalidate_disk()), rescan_partitions() can be triggered from userspace at any time. This breaks rbd, where the ceph messenger is responsible for generating/verifying CRCs. Since blk_integrity_{un,}register() "must" be used for (un)registering the integrity profile with the block layer, move BDI_CAP_STABLE_WRITES setting there. This way drivers that call blk_integrity_register() and use integrity infrastructure won't interfere with drivers that don't but still want stable pages. Fixes: 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: Mike Snitzer Tested-by: Dan Williams Signed-off-by: Ilya Dryomov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-integrity.c | 19 ++----------------- block/partition-generic.c | 1 - fs/block_dev.c | 1 - include/linux/genhd.h | 2 -- 4 files changed, 2 insertions(+), 21 deletions(-) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 9f0ff5ba4f84..35c5af1ea068 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -417,7 +417,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; - blk_integrity_revalidate(disk); + disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; } EXPORT_SYMBOL(blk_integrity_register); @@ -430,26 +430,11 @@ EXPORT_SYMBOL(blk_integrity_register); */ void blk_integrity_unregister(struct gendisk *disk) { - blk_integrity_revalidate(disk); + disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); } EXPORT_SYMBOL(blk_integrity_unregister); -void blk_integrity_revalidate(struct gendisk *disk) -{ - struct blk_integrity *bi = &disk->queue->integrity; - - if (!(disk->flags & GENHD_FL_UP)) - return; - - if (bi->profile) - disk->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; - else - disk->queue->backing_dev_info->capabilities &= - ~BDI_CAP_STABLE_WRITES; -} - void blk_integrity_add(struct gendisk *disk) { if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype, diff --git a/block/partition-generic.c b/block/partition-generic.c index 7afb9907821f..0171a2faad68 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -497,7 +497,6 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); - blk_integrity_revalidate(disk); check_disk_size_change(disk, bdev); bdev->bd_invalidated = 0; if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) diff --git a/fs/block_dev.c b/fs/block_dev.c index 2eca00ec4370..56039dfbc674 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1451,7 +1451,6 @@ int revalidate_disk(struct gendisk *disk) if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - blk_integrity_revalidate(disk); bdev = bdget_disk(disk, 0); if (!bdev) return ret; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 76f39754e7b0..76d6a1cd4153 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -722,11 +722,9 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) #if defined(CONFIG_BLK_DEV_INTEGRITY) extern void blk_integrity_add(struct gendisk *); extern void blk_integrity_del(struct gendisk *); -extern void blk_integrity_revalidate(struct gendisk *); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline void blk_integrity_add(struct gendisk *disk) { } static inline void blk_integrity_del(struct gendisk *disk) { } -static inline void blk_integrity_revalidate(struct gendisk *disk) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ #else /* CONFIG_BLOCK */ From 4c71e91a048f77e97205831a7dd1fe86c63d2738 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 May 2017 14:06:16 +0200 Subject: [PATCH 029/465] Linux 4.11.1 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4b074a904106..9dc2aec1c2e5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 11 -SUBLEVEL = 0 +SUBLEVEL = 1 EXTRAVERSION = NAME = Fearless Coyote From 1ec6f0815aa4cfc11f4be64cb532d0fccdd57fce Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 10 May 2017 06:08:44 +0200 Subject: [PATCH 030/465] xen: adjust early dom0 p2m handling to xen hypervisor behavior commit 69861e0a52f8733355ce246f0db15e1b240ad667 upstream. When booted as pv-guest the p2m list presented by the Xen is already mapped to virtual addresses. In dom0 case the hypervisor might make use of 2M- or 1G-pages for this mapping. Unfortunately while being properly aligned in virtual and machine address space, those pages might not be aligned properly in guest physical address space. So when trying to obtain the guest physical address of such a page pud_pfn() and pmd_pfn() must be avoided as those will mask away guest physical address bits not being zero in this special case. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/mmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 37cb5aad71de..07b13e26215e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2023,7 +2023,8 @@ static unsigned long __init xen_read_phys_ulong(phys_addr_t addr) /* * Translate a virtual address to a physical one without relying on mapped - * page tables. + * page tables. Don't rely on big pages being aligned in (guest) physical + * space! */ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) { @@ -2044,7 +2045,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) sizeof(pud))); if (!pud_present(pud)) return 0; - pa = pud_pfn(pud) << PAGE_SHIFT; + pa = pud_val(pud) & PTE_PFN_MASK; if (pud_large(pud)) return pa + (vaddr & ~PUD_MASK); @@ -2052,7 +2053,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) sizeof(pmd))); if (!pmd_present(pmd)) return 0; - pa = pmd_pfn(pmd) << PAGE_SHIFT; + pa = pmd_val(pmd) & PTE_PFN_MASK; if (pmd_large(pmd)) return pa + (vaddr & ~PMD_MASK); From 25ed85889dcd29c19b02d12156c34fb065ba676c Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 11 Apr 2017 16:24:16 -0700 Subject: [PATCH 031/465] target: Fix compare_and_write_callback handling for non GOOD status commit a71a5dc7f833943998e97ca8fa6a4c708a0ed1a9 upstream. Following the bugfix for handling non SAM_STAT_GOOD COMPARE_AND_WRITE status during COMMIT phase in commit 9b2792c3da1, the same bug exists for the READ phase as well. This would manifest first as a lost SCSI response, and eventual hung task during fabric driver logout or re-login, as existing shutdown logic waited for the COMPARE_AND_WRITE se_cmd->cmd_kref to reach zero. To address this bug, compare_and_write_callback() has been changed to set post_ret = 1 and return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE as necessary to signal failure status. Reported-by: Bill Borsari Cc: Bill Borsari Tested-by: Gary Guo Cc: Gary Guo Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_sbc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index c194063f169b..6ec390bb178e 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -507,8 +507,11 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes * been failed with a non-zero SCSI status. */ if (cmd->scsi_status) { - pr_err("compare_and_write_callback: non zero scsi_status:" + pr_debug("compare_and_write_callback: non zero scsi_status:" " 0x%02x\n", cmd->scsi_status); + *post_ret = 1; + if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION) + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto out; } From 14a890020f947089c7d4c0ee04833cece9de5a58 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 4 May 2017 15:50:47 -0700 Subject: [PATCH 032/465] target/fileio: Fix zero-length READ and WRITE handling commit 59ac9c078141b8fd0186c0b18660a1b2c24e724e upstream. This patch fixes zero-length READ and WRITE handling in target/FILEIO, which was broken a long time back by: Since: commit d81cb44726f050d7cf1be4afd9cb45d153b52066 Author: Paolo Bonzini Date: Mon Sep 17 16:36:11 2012 -0700 target: go through normal processing for all zero-length commands which moved zero-length READ and WRITE completion out of target-core, to doing submission into backend driver code. To address this, go ahead and invoke target_complete_cmd() for any non negative return value in fd_do_rw(). Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Cc: Andy Grover Cc: David Disseldorp Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 87aa376a1a1a..e00050ccb61d 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -595,8 +595,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - if (ret) - target_complete_cmd(cmd, SAM_STAT_GOOD); + target_complete_cmd(cmd, SAM_STAT_GOOD); return 0; } From f9a45058a31257edf502638498c3a98602addd76 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 25 Apr 2017 10:55:12 -0700 Subject: [PATCH 033/465] iscsi-target: Set session_fall_back_to_erl0 when forcing reinstatement commit 197b806ae5db60c6f609d74da04ddb62ea5e1b00 upstream. While testing modification of per se_node_acl queue_depth forcing session reinstatement via lio_target_nacl_cmdsn_depth_store() -> core_tpg_set_initiator_node_queue_depth(), a hung task bug triggered when changing cmdsn_depth invoked session reinstatement while an iscsi login was already waiting for session reinstatement to complete. This can happen when an outstanding se_cmd descriptor is taking a long time to complete, and session reinstatement from iscsi login or cmdsn_depth change occurs concurrently. To address this bug, explicitly set session_fall_back_to_erl0 = 1 when forcing session reinstatement, so session reinstatement is not attempted if an active session is already being shutdown. This patch has been tested with two scenarios. The first when iscsi login is blocked waiting for iscsi session reinstatement to complete followed by queue_depth change via configfs, and second when queue_depth change via configfs us blocked followed by a iscsi login driven session reinstatement. Note this patch depends on commit d36ad77f702 to handle multiple sessions per se_node_acl when changing cmdsn_depth, and for pre v4.5 kernels will need to be included for stable as well. Reported-by: Gary Guo Tested-by: Gary Guo Cc: Gary Guo Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 1 + drivers/target/iscsi/iscsi_target_configfs.c | 1 + drivers/target/iscsi/iscsi_target_login.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index e3f9ed3690b7..8beed3451346 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4683,6 +4683,7 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) continue; } atomic_set(&sess->session_reinstatement, 1); + atomic_set(&sess->session_fall_back_to_erl0, 1); spin_unlock(&sess->conn_lock); list_move_tail(&se_sess->sess_list, &free_list); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 344e8448869c..96d9c73af1ae 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1528,6 +1528,7 @@ static void lio_tpg_close_session(struct se_session *se_sess) return; } atomic_set(&sess->session_reinstatement, 1); + atomic_set(&sess->session_fall_back_to_erl0, 1); spin_unlock(&sess->conn_lock); iscsit_stop_time2retain_timer(sess); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index ad8f3011bdc2..66238477137b 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -208,6 +208,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) initiatorname_param->value) && (sess_p->sess_ops->SessionType == sessiontype))) { atomic_set(&sess_p->session_reinstatement, 1); + atomic_set(&sess_p->session_fall_back_to_erl0, 1); spin_unlock(&sess_p->conn_lock); iscsit_inc_session_usage_count(sess_p); iscsit_stop_time2retain_timer(sess_p); From c5379cf67f1f2de699166e4275e43df39d21c575 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 7 Apr 2017 17:57:12 +0300 Subject: [PATCH 034/465] usb: xhci: bInterval quirk for TI TUSB73x0 commit 69307ccb9ad7ccb653e332de68effdeaaab6907d upstream. As per [1] issue #4, "The periodic EP scheduler always tries to schedule the EPs that have large intervals (interval equal to or greater than 128 microframes) into different microframes. So it maintains an internal counter and increments for each large interval EP added. When the counter is greater than 128, the scheduler rejects the new EP. So when the hub re-enumerated 128 times, it triggers this condition." This results in Bandwidth error when devices with periodic endpoints (ISO/INT) having bInterval > 7 are plugged and unplugged several times on a TUSB73x0 XHCI host. Workaround this issue by limiting the bInterval to 7 (i.e. interval to 6) for High-speed or faster periodic endpoints. [1] - http://www.ti.com/lit/er/sllz076/sllz076.pdf Signed-off-by: Roger Quadros Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 11 +++++++++++ drivers/usb/host/xhci-pci.c | 3 +++ drivers/usb/host/xhci.h | 1 + 3 files changed, 15 insertions(+) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index ba1853f4e407..05fb3f65d7f7 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1502,6 +1502,17 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, */ max_esit_payload = xhci_get_max_esit_payload(udev, ep); interval = xhci_get_endpoint_interval(udev, ep); + + /* Periodic endpoint bInterval limit quirk */ + if (usb_endpoint_xfer_int(&ep->desc) || + usb_endpoint_xfer_isoc(&ep->desc)) { + if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) && + udev->speed >= USB_SPEED_HIGH && + interval >= 7) { + interval = 6; + } + } + mult = xhci_get_endpoint_mult(udev, ep); max_packet = usb_endpoint_maxp(&ep->desc); max_burst = xhci_get_endpoint_max_burst(udev, ep); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index fc99f51d12e1..7b86508ac8cf 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -199,6 +199,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x1042) xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index da3eb695fe54..2496bd6304ca 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1818,6 +1818,7 @@ struct xhci_hcd { #define XHCI_MISSING_CAS (1 << 24) /* For controller with a broken Port Disable implementation */ #define XHCI_BROKEN_PORT_PED (1 << 25) +#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) unsigned int num_active_eps; unsigned int limit_active_eps; From f34b103cd5a48ece7b85917b87dac2a15a27de0c Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 19 Apr 2017 16:55:52 +0300 Subject: [PATCH 035/465] usb: host: xhci: print correct command ring address commit 6fc091fb0459ade939a795bfdcaf645385b951d4 upstream. Print correct command ring address using 'val_64'. Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 05fb3f65d7f7..3f8f28f6fa94 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2491,7 +2491,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) (xhci->cmd_ring->first_seg->dma & (u64) ~CMD_RING_RSVD_BITS) | xhci->cmd_ring->cycle_state; xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Setting command ring address to 0x%x", val); + "// Setting command ring address to 0x%016llx", val_64); xhci_write_64(xhci, val_64, &xhci->op_regs->cmd_ring); xhci_dbg_cmd_ptrs(xhci); From 2436236ee3e7a951db1bc665bf9bac3098d92f91 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 18 Apr 2017 20:07:56 +0200 Subject: [PATCH 036/465] USB: serial: ftdi_sio: add device ID for Microsemi/Arrow SF2PLUS Dev Kit commit 31c5d1922b90ddc1da6a6ddecef7cd31f17aa32b upstream. This development kit has an FT4232 on it with a custom USB VID/PID. The FT4232 provides four UARTs, but only two are used. The UART 0 is used by the FlashPro5 programmer and UART 2 is connected to the SmartFusion2 CortexM3 SoC UART port. Note that the USB VID is registered to Actel according to Linux USB VID database, but that was acquired by Microsemi. Signed-off-by: Marek Vasut Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c540de15aad2..03e6319b6d1c 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -873,6 +873,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE_AND_INTERFACE_INFO(MICROCHIP_VID, MICROCHIP_USB_BOARD_PID, USB_CLASS_VENDOR_SPEC, USB_SUBCLASS_VENDOR_SPEC, 0x00) }, + { USB_DEVICE_INTERFACE_NUMBER(ACTEL_VID, MICROSEMI_ARROW_SF2PLUS_BOARD_PID, 2) }, { USB_DEVICE(JETI_VID, JETI_SPC1201_PID) }, { USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 48ee04c94a75..71fb9e59db71 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -873,6 +873,12 @@ #define FIC_VID 0x1457 #define FIC_NEO1973_DEBUG_PID 0x5118 +/* + * Actel / Microsemi + */ +#define ACTEL_VID 0x1514 +#define MICROSEMI_ARROW_SF2PLUS_BOARD_PID 0x2008 + /* Olimex */ #define OLIMEX_VID 0x15BA #define OLIMEX_ARM_USB_OCD_PID 0x0003 From 2c33341c96be3e35dc40b0a5324a214327f8ab8b Mon Sep 17 00:00:00 2001 From: Ajay Kaher Date: Tue, 28 Mar 2017 08:09:32 -0400 Subject: [PATCH 037/465] USB: Proper handling of Race Condition when two USB class drivers try to call init_usb_class simultaneously commit 2f86a96be0ccb1302b7eee7855dbee5ce4dc5dfb upstream. There is race condition when two USB class drivers try to call init_usb_class at the same time and leads to crash. code path: probe->usb_register_dev->init_usb_class To solve this, mutex locking has been added in init_usb_class() and destroy_usb_class(). As pointed by Alan, removed "if (usb_class)" test from destroy_usb_class() because usb_class can never be NULL there. Signed-off-by: Ajay Kaher Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/file.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c index e26bd5e773ad..87ad6b6bfee8 100644 --- a/drivers/usb/core/file.c +++ b/drivers/usb/core/file.c @@ -29,6 +29,7 @@ #define MAX_USB_MINORS 256 static const struct file_operations *usb_minors[MAX_USB_MINORS]; static DECLARE_RWSEM(minor_rwsem); +static DEFINE_MUTEX(init_usb_class_mutex); static int usb_open(struct inode *inode, struct file *file) { @@ -111,8 +112,9 @@ static void release_usb_class(struct kref *kref) static void destroy_usb_class(void) { - if (usb_class) - kref_put(&usb_class->kref, release_usb_class); + mutex_lock(&init_usb_class_mutex); + kref_put(&usb_class->kref, release_usb_class); + mutex_unlock(&init_usb_class_mutex); } int usb_major_init(void) @@ -173,7 +175,10 @@ int usb_register_dev(struct usb_interface *intf, if (intf->minor >= 0) return -EADDRINUSE; + mutex_lock(&init_usb_class_mutex); retval = init_usb_class(); + mutex_unlock(&init_usb_class_mutex); + if (retval) return retval; From f49d36b7b3004a36cbe59da2b80917526c5ded2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 21 Apr 2017 10:01:29 +0200 Subject: [PATCH 038/465] USB: Revert "cdc-wdm: fix "out-of-sync" due to missing notifications" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 19445816996d1a89682c37685fe95959631d9f32 upstream. This reverts commit 833415a3e781 ("cdc-wdm: fix "out-of-sync" due to missing notifications") There have been several reports of wdm_read returning unexpected EIO errors with QMI devices using the qmi_wwan driver. The reporters confirm that reverting prevents these errors. I have been unable to reproduce the bug myself, and have no explanation to offer either. But reverting is the safe choice here, given that the commit was an attempt to work around a firmware problem. Living with a firmware problem is still better than adding driver bugs. Reported-by: Kasper Holtze Reported-by: Aleksander Morgado Reported-by: Daniele Palmas Fixes: 833415a3e781 ("cdc-wdm: fix "out-of-sync" due to missing notifications") Signed-off-by: Bjørn Mork Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 103 ++---------------------------------- 1 file changed, 4 insertions(+), 99 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 8fda45a45bd3..08669fee6d7f 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -58,7 +58,6 @@ MODULE_DEVICE_TABLE (usb, wdm_ids); #define WDM_SUSPENDING 8 #define WDM_RESETTING 9 #define WDM_OVERFLOW 10 -#define WDM_DRAIN_ON_OPEN 11 #define WDM_MAX 16 @@ -182,7 +181,7 @@ static void wdm_in_callback(struct urb *urb) "nonzero urb status received: -ESHUTDOWN\n"); goto skip_error; case -EPIPE: - dev_dbg(&desc->intf->dev, + dev_err(&desc->intf->dev, "nonzero urb status received: -EPIPE\n"); break; default: @@ -210,25 +209,6 @@ static void wdm_in_callback(struct urb *urb) desc->reslength = length; } } - - /* - * Handling devices with the WDM_DRAIN_ON_OPEN flag set: - * If desc->resp_count is unset, then the urb was submitted - * without a prior notification. If the device returned any - * data, then this implies that it had messages queued without - * notifying us. Continue reading until that queue is flushed. - */ - if (!desc->resp_count) { - if (!length) { - /* do not propagate the expected -EPIPE */ - desc->rerr = 0; - goto unlock; - } - dev_dbg(&desc->intf->dev, "got %d bytes without notification\n", length); - set_bit(WDM_RESPONDING, &desc->flags); - usb_submit_urb(desc->response, GFP_ATOMIC); - } - skip_error: set_bit(WDM_READ, &desc->flags); wake_up(&desc->wait); @@ -243,7 +223,6 @@ static void wdm_in_callback(struct urb *urb) service_outstanding_interrupt(desc); } -unlock: spin_unlock(&desc->iuspin); } @@ -686,17 +665,6 @@ static int wdm_open(struct inode *inode, struct file *file) dev_err(&desc->intf->dev, "Error submitting int urb - %d\n", rv); rv = usb_translate_errors(rv); - } else if (test_bit(WDM_DRAIN_ON_OPEN, &desc->flags)) { - /* - * Some devices keep pending messages queued - * without resending notifications. We must - * flush the message queue before we can - * assume a one-to-one relationship between - * notifications and messages in the queue - */ - dev_dbg(&desc->intf->dev, "draining queued data\n"); - set_bit(WDM_RESPONDING, &desc->flags); - rv = usb_submit_urb(desc->response, GFP_KERNEL); } } else { rv = 0; @@ -803,8 +771,7 @@ static void wdm_rxwork(struct work_struct *work) /* --- hotplug --- */ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep, - u16 bufsize, int (*manage_power)(struct usb_interface *, int), - bool drain_on_open) + u16 bufsize, int (*manage_power)(struct usb_interface *, int)) { int rv = -ENOMEM; struct wdm_device *desc; @@ -891,68 +858,6 @@ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor desc->manage_power = manage_power; - /* - * "drain_on_open" enables a hack to work around a firmware - * issue observed on network functions, in particular MBIM - * functions. - * - * Quoting section 7 of the CDC-WMC r1.1 specification: - * - * "The firmware shall interpret GetEncapsulatedResponse as a - * request to read response bytes. The firmware shall send - * the next wLength bytes from the response. The firmware - * shall allow the host to retrieve data using any number of - * GetEncapsulatedResponse requests. The firmware shall - * return a zero- length reply if there are no data bytes - * available. - * - * The firmware shall send ResponseAvailable notifications - * periodically, using any appropriate algorithm, to inform - * the host that there is data available in the reply - * buffer. The firmware is allowed to send ResponseAvailable - * notifications even if there is no data available, but - * this will obviously reduce overall performance." - * - * These requirements, although they make equally sense, are - * often not implemented by network functions. Some firmwares - * will queue data indefinitely, without ever resending a - * notification. The result is that the driver and firmware - * loses "syncronization" if the driver ever fails to respond - * to a single notification, something which easily can happen - * on release(). When this happens, the driver will appear to - * never receive notifications for the most current data. Each - * notification will only cause a single read, which returns - * the oldest data in the firmware's queue. - * - * The "drain_on_open" hack resolves the situation by draining - * data from the firmware until none is returned, without a - * prior notification. - * - * This will inevitably race with the firmware, risking that - * we read data from the device before handling the associated - * notification. To make things worse, some of the devices - * needing the hack do not implement the "return zero if no - * data is available" requirement either. Instead they return - * an error on the subsequent read in this case. This means - * that "winning" the race can cause an unexpected EIO to - * userspace. - * - * "winning" the race is more likely on resume() than on - * open(), and the unexpected error is more harmful in the - * middle of an open session. The hack is therefore only - * applied on open(), and not on resume() where it logically - * would be equally necessary. So we define open() as the only - * driver <-> device "syncronization point". Should we happen - * to lose a notification after open(), then syncronization - * will be lost until release() - * - * The hack should not be enabled for CDC WDM devices - * conforming to the CDC-WMC r1.1 specification. This is - * ensured by setting drain_on_open to false in wdm_probe(). - */ - if (drain_on_open) - set_bit(WDM_DRAIN_ON_OPEN, &desc->flags); - spin_lock(&wdm_device_list_lock); list_add(&desc->device_list, &wdm_device_list); spin_unlock(&wdm_device_list_lock); @@ -1006,7 +911,7 @@ static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id) goto err; ep = &iface->endpoint[0].desc; - rv = wdm_create(intf, ep, maxcom, &wdm_manage_power, false); + rv = wdm_create(intf, ep, maxcom, &wdm_manage_power); err: return rv; @@ -1038,7 +943,7 @@ struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf, { int rv = -EINVAL; - rv = wdm_create(intf, ep, bufsize, manage_power, true); + rv = wdm_create(intf, ep, bufsize, manage_power); if (rv < 0) goto err; From 5ba4fd334f607369cff4201107b58f0404e2f67e Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sat, 22 Apr 2017 11:14:58 +0100 Subject: [PATCH 039/465] staging: vt6656: use off stack for in buffer USB transfers. commit 05c0cf88bec588a7cb34de569acd871ceef26760 upstream. Since 4.9 mandated USB buffers to be heap allocated. This causes the driver to fail. Create buffer for USB transfers. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/usbpipe.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vt6656/usbpipe.c b/drivers/staging/vt6656/usbpipe.c index 1ae6a64c7fd4..72c2ba07c284 100644 --- a/drivers/staging/vt6656/usbpipe.c +++ b/drivers/staging/vt6656/usbpipe.c @@ -75,15 +75,28 @@ int vnt_control_in(struct vnt_private *priv, u8 request, u16 value, u16 index, u16 length, u8 *buffer) { int status; + u8 *usb_buffer; if (test_bit(DEVICE_FLAGS_DISCONNECTED, &priv->flags)) return STATUS_FAILURE; mutex_lock(&priv->usb_lock); + usb_buffer = kmalloc(length, GFP_KERNEL); + if (!usb_buffer) { + mutex_unlock(&priv->usb_lock); + return -ENOMEM; + } + status = usb_control_msg(priv->usb, - usb_rcvctrlpipe(priv->usb, 0), request, 0xc0, value, - index, buffer, length, USB_CTL_WAIT); + usb_rcvctrlpipe(priv->usb, 0), + request, 0xc0, value, + index, usb_buffer, length, USB_CTL_WAIT); + + if (status == length) + memcpy(buffer, usb_buffer, length); + + kfree(usb_buffer); mutex_unlock(&priv->usb_lock); From af685eefa2769a171074344c1e1c2457ed5a347b Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sat, 22 Apr 2017 11:14:57 +0100 Subject: [PATCH 040/465] staging: vt6656: use off stack for out buffer USB transfers. commit 12ecd24ef93277e4e5feaf27b0b18f2d3828bc5e upstream. Since 4.9 mandated USB buffers be heap allocated this causes the driver to fail. Since there is a wide range of buffer sizes use kmemdup to create allocated buffer. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/usbpipe.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vt6656/usbpipe.c b/drivers/staging/vt6656/usbpipe.c index 72c2ba07c284..dc11a05be8c4 100644 --- a/drivers/staging/vt6656/usbpipe.c +++ b/drivers/staging/vt6656/usbpipe.c @@ -47,15 +47,25 @@ int vnt_control_out(struct vnt_private *priv, u8 request, u16 value, u16 index, u16 length, u8 *buffer) { int status = 0; + u8 *usb_buffer; if (test_bit(DEVICE_FLAGS_DISCONNECTED, &priv->flags)) return STATUS_FAILURE; mutex_lock(&priv->usb_lock); + usb_buffer = kmemdup(buffer, length, GFP_KERNEL); + if (!usb_buffer) { + mutex_unlock(&priv->usb_lock); + return -ENOMEM; + } + status = usb_control_msg(priv->usb, - usb_sndctrlpipe(priv->usb, 0), request, 0x40, value, - index, buffer, length, USB_CTL_WAIT); + usb_sndctrlpipe(priv->usb, 0), + request, 0x40, value, + index, usb_buffer, length, USB_CTL_WAIT); + + kfree(usb_buffer); mutex_unlock(&priv->usb_lock); From 22d8767df90a1d4dd8a955e579e59ead617adcb7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 26 Apr 2017 12:23:04 +0200 Subject: [PATCH 041/465] staging: gdm724x: gdm_mux: fix use-after-free on module unload commit b58f45c8fc301fe83ee28cad3e64686c19e78f1c upstream. Make sure to deregister the USB driver before releasing the tty driver to avoid use-after-free in the USB disconnect callback where the tty devices are deregistered. Fixes: 61e121047645 ("staging: gdm7240: adding LTE USB driver") Cc: Won Kang Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gdm724x/gdm_mux.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/gdm724x/gdm_mux.c b/drivers/staging/gdm724x/gdm_mux.c index 400969170d1c..f03e43b1b5f6 100644 --- a/drivers/staging/gdm724x/gdm_mux.c +++ b/drivers/staging/gdm724x/gdm_mux.c @@ -664,9 +664,8 @@ static int __init gdm_usb_mux_init(void) static void __exit gdm_usb_mux_exit(void) { - unregister_lte_tty_driver(); - usb_deregister(&gdm_mux_driver); + unregister_lte_tty_driver(); } module_init(gdm_usb_mux_init); From 89cb8fcceab18f32ef637234b909a49649f475d9 Mon Sep 17 00:00:00 2001 From: Aditya Shankar Date: Fri, 7 Apr 2017 17:24:58 +0530 Subject: [PATCH 042/465] staging: wilc1000: Fix problem with wrong vif index commit 0e490657c7214cce33fbca3d88227298c5c968ae upstream. The vif->idx value is always 0 for two interfaces. wl->vif_num = 0; loop { ... vif->idx = wl->vif_num; ... wl->vif_num = i; .... i++; ... } At present, vif->idx is assigned the value of wl->vif_num at the beginning of this block and device is initialized based on this index value. In the next iteration, wl->vif_num is still 0 as it is only updated later but gets assigned to vif->idx in the beginning. This causes problems later when we try to reference a particular interface and also while configuring the firmware. This patch moves the assignment to vif->idx from the beginning of the block to after wl->vif_num is updated with latest value of i. Fixes: commit 735bb39ca3be ("staging: wilc1000: simplify vif[i]->ndev accesses") Signed-off-by: Aditya Shankar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/linux_wlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c index 2eebc6215cac..bdf11c9f44a8 100644 --- a/drivers/staging/wilc1000/linux_wlan.c +++ b/drivers/staging/wilc1000/linux_wlan.c @@ -1251,11 +1251,12 @@ int wilc_netdev_init(struct wilc **wilc, struct device *dev, int io_type, else strcpy(ndev->name, "p2p%d"); - vif->idx = wl->vif_num; vif->wilc = *wilc; vif->ndev = ndev; wl->vif[i] = vif; wl->vif_num = i; + vif->idx = wl->vif_num; + ndev->netdev_ops = &wilc_netdev_ops; { From 99185235783a7479e6c28f508fbba79500e06fbd Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 25 Mar 2017 07:31:57 -0300 Subject: [PATCH 043/465] staging: sir: fill in missing fields and fix probe commit cf9ed9aa5b0c196b796d2728218e3c06b0f42d90 upstream. Some fields are left blank. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/lirc/lirc_sir.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c index c6c3de94adaa..edb607a45505 100644 --- a/drivers/staging/media/lirc/lirc_sir.c +++ b/drivers/staging/media/lirc/lirc_sir.c @@ -227,6 +227,7 @@ static int init_chrdev(void) if (!rcdev) return -ENOMEM; + rcdev->input_name = "SIR IrDA port"; rcdev->input_phys = KBUILD_MODNAME "/input0"; rcdev->input_id.bustype = BUS_HOST; rcdev->input_id.vendor = 0x0001; @@ -234,6 +235,7 @@ static int init_chrdev(void) rcdev->input_id.version = 0x0100; rcdev->tx_ir = sir_tx_ir; rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER; + rcdev->driver_name = KBUILD_MODNAME; rcdev->map_name = RC_MAP_RC6_MCE; rcdev->timeout = IR_DEFAULT_TIMEOUT; rcdev->dev.parent = &sir_ir_dev->dev; @@ -740,7 +742,13 @@ static int init_sir_ir(void) static int sir_ir_probe(struct platform_device *dev) { - return 0; + int retval; + + retval = init_chrdev(); + if (retval < 0) + return retval; + + return init_sir_ir(); } static int sir_ir_remove(struct platform_device *dev) @@ -780,18 +788,8 @@ static int __init sir_ir_init(void) goto pdev_add_fail; } - retval = init_chrdev(); - if (retval < 0) - goto fail; - - retval = init_sir_ir(); - if (retval) - goto fail; - return 0; -fail: - platform_device_del(sir_ir_dev); pdev_add_fail: platform_device_put(sir_ir_dev); pdev_alloc_fail: From 6e4c973ac6292fc72b9d3818afb0221e3202f738 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 17 Feb 2017 11:09:08 +0000 Subject: [PATCH 044/465] staging: comedi: jr3_pci: fix possible null pointer dereference commit 45292be0b3db0b7f8286683b376e2d9f949d11f9 upstream. For some reason, the driver does not consider allocation of the subdevice private data to be a fatal error when attaching the COMEDI device. It tests the subdevice private data pointer for validity at certain points, but omits some crucial tests. In particular, `jr3_pci_auto_attach()` calls `jr3_pci_alloc_spriv()` to allocate and initialize the subdevice private data, but the same function subsequently dereferences the pointer to access the `next_time_min` and `next_time_max` members without checking it first. The other missing test is in the timer expiry routine `jr3_pci_poll_dev()`, but it will crash before it gets that far. Fix the bug by returning `-ENOMEM` from `jr3_pci_auto_attach()` as soon as one of the calls to `jr3_pci_alloc_spriv()` returns `NULL`. The COMEDI core will subsequently call `jr3_pci_detach()` to clean up. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/jr3_pci.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/staging/comedi/drivers/jr3_pci.c b/drivers/staging/comedi/drivers/jr3_pci.c index 70390de66e0e..25909a936e7c 100644 --- a/drivers/staging/comedi/drivers/jr3_pci.c +++ b/drivers/staging/comedi/drivers/jr3_pci.c @@ -727,11 +727,12 @@ static int jr3_pci_auto_attach(struct comedi_device *dev, s->insn_read = jr3_pci_ai_insn_read; spriv = jr3_pci_alloc_spriv(dev, s); - if (spriv) { - /* Channel specific range and maxdata */ - s->range_table_list = spriv->range_table_list; - s->maxdata_list = spriv->maxdata_list; - } + if (!spriv) + return -ENOMEM; + + /* Channel specific range and maxdata */ + s->range_table_list = spriv->range_table_list; + s->maxdata_list = spriv->maxdata_list; } /* Reset DSP card */ From 69f8945b4f38799b7b46de744acae8c9f447f8d7 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 17 Feb 2017 11:09:09 +0000 Subject: [PATCH 045/465] staging: comedi: jr3_pci: cope with jiffies wraparound commit 8ec04a491825e08068e92bed0bba7821893b6433 upstream. The timer expiry routine `jr3_pci_poll_dev()` checks for expiry by checking whether the absolute value of `jiffies` (stored in local variable `now`) is greater than the expected expiry time in jiffy units. This will fail when `jiffies` wraps around. Also, it seems to make sense to handle the expiry one jiffy earlier than the current test. Use `time_after_eq()` to check for expiry. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/jr3_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/jr3_pci.c b/drivers/staging/comedi/drivers/jr3_pci.c index 25909a936e7c..eb0a095efe9c 100644 --- a/drivers/staging/comedi/drivers/jr3_pci.c +++ b/drivers/staging/comedi/drivers/jr3_pci.c @@ -611,7 +611,7 @@ static void jr3_pci_poll_dev(unsigned long data) s = &dev->subdevices[i]; spriv = s->private; - if (now > spriv->next_time_min) { + if (time_after_eq(now, spriv->next_time_min)) { struct jr3_pci_poll_delay sub_delay; sub_delay = jr3_pci_poll_subdevice(s); From 84759debb0e598c2948f833ef6b6c88712ce3459 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 3 Apr 2017 22:48:40 -0500 Subject: [PATCH 046/465] usb: misc: add missing continue in switch commit 2c930e3d0aed1505e86e0928d323df5027817740 upstream. Add missing continue in switch. Addresses-Coverity-ID: 1248733 Signed-off-by: Gustavo A. R. Silva Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usbtest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 17c081068257..26ae5d1a2a4e 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -159,6 +159,7 @@ get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf) case USB_ENDPOINT_XFER_INT: if (dev->info->intr) goto try_intr; + continue; case USB_ENDPOINT_XFER_ISOC: if (dev->info->iso) goto try_iso; From 6a9fc06acfb9934fb4aa9a6690a7f5998f3e1151 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Fri, 10 Mar 2017 14:11:41 +0100 Subject: [PATCH 047/465] usb: gadget: legacy gadgets are optional commit 6e253d0fbc665b36192b8ed3cecdbb65b413a1eb upstream. With commit bc49d1d17dcf ("usb: gadget: don't couple configfs to legacy gadgets"),it is possible to build a modular kernel with both built-in configfs support and modular legacy gadget drivers. But when building a kernel without modules, it is also necessary to be able to build with configfs but without any legacy gadget driver. This was a possible configuration when the USB_CONFIGFS was a part of the choice options, but not anymore. Mark the choice for legacy gadget drivers as optional restores this. Fixes: bc49d1d17dcf ("usb: gadget: don't couple configfs to legacy gadgets") Signed-off-by: Romain Izard Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 8ad203296079..f3ee80ece682 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -460,6 +460,7 @@ config USB_CONFIGFS_F_TCM choice tristate "USB Gadget Drivers" default USB_ETH + optional help A Linux "Gadget Driver" talks to the USB Peripheral Controller driver through the abstract "gadget" API. Some other operating From 8e3b0f66f4c6387e83aef9541fea6d804edc891b Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 13 Apr 2017 15:33:34 +0300 Subject: [PATCH 048/465] usb: Make sure usb/phy/of gets built-in commit 3d6159640da9c9175d1ca42f151fc1a14caded59 upstream. DWC3 driver uses of_usb_get_phy_mode() which is implemented in drivers/usb/phy/of.c and in bare minimal configuration it might not be pulled in kernel binary. In case of ARC or ARM this could be easily reproduced with "allnodefconfig" +CONFIG_USB=m +CONFIG_USB_DWC3=m. On building all ends-up with: ---------------------->8------------------ Kernel: arch/arm/boot/Image is ready Kernel: arch/arm/boot/zImage is ready Building modules, stage 2. MODPOST 5 modules ERROR: "of_usb_get_phy_mode" [drivers/usb/dwc3/dwc3.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 ---------------------->8------------------ Signed-off-by: Alexey Brodkin Cc: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: Geert Uytterhoeven Cc: Nicolas Pitre Cc: Thomas Gleixner Cc: Felipe Balbi Cc: Felix Fietkau Cc: Jeremy Kerr Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/Makefile b/drivers/Makefile index 2eced9afba53..8f8bdc9e3d29 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -104,6 +104,7 @@ obj-$(CONFIG_USB_PHY) += usb/ obj-$(CONFIG_USB) += usb/ obj-$(CONFIG_PCI) += usb/ obj-$(CONFIG_USB_GADGET) += usb/ +obj-$(CONFIG_OF) += usb/ obj-$(CONFIG_SERIO) += input/serio/ obj-$(CONFIG_GAMEPORT) += input/gameport/ obj-$(CONFIG_INPUT) += input/ From 90d4e2a6592da98bfa99941bddd0d83ae789a565 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Mar 2017 11:16:11 -0700 Subject: [PATCH 049/465] usb: hub: Fix error loop seen after hub communication errors commit 245b2eecee2aac6fdc77dcafaa73c33f9644c3c7 upstream. While stress testing a usb controller using a bind/unbind looop, the following error loop was observed. usb 7-1.2: new low-speed USB device number 3 using xhci-hcd usb 7-1.2: hub failed to enable device, error -108 usb 7-1-port2: cannot disable (err = -22) usb 7-1-port2: couldn't allocate usb_device usb 7-1-port2: cannot disable (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: activate --> -22 hub 7-1:1.0: hub_ext_port_status failed (err = -22) hub 7-1:1.0: hub_ext_port_status failed (err = -22) ** 57 printk messages dropped ** hub 7-1:1.0: activate --> -22 ** 82 printk messages dropped ** hub 7-1:1.0: hub_ext_port_status failed (err = -22) This continues forever. After adding tracebacks into the code, the call sequence leading to this is found to be as follows. [] hub_activate+0x368/0x7b8 [] hub_resume+0x2c/0x3c [] usb_resume_interface.isra.6+0x128/0x158 [] usb_suspend_both+0x1e8/0x288 [] usb_runtime_suspend+0x3c/0x98 [] __rpm_callback+0x48/0x7c [] rpm_callback+0xa8/0xd4 [] rpm_suspend+0x84/0x758 [] rpm_idle+0x2c8/0x498 [] __pm_runtime_idle+0x60/0xac [] usb_autopm_put_interface+0x6c/0x7c [] hub_event+0x10ac/0x12ac [] process_one_work+0x390/0x6b8 [] worker_thread+0x480/0x610 [] kthread+0x164/0x178 [] ret_from_fork+0x10/0x40 kick_hub_wq() is called from hub_activate() even after failures to communicate with the hub. This results in an endless sequence of hub event -> hub activate -> wq trigger -> hub event -> ... Provide two solutions for the problem. - Only trigger the hub event queue if communication with the hub is successful. - After a suspend failure, only resume already suspended interfaces if the communication with the device is still possible. Each of the changes fixes the observed problem. Use both to improve robustness. Acked-by: Alan Stern Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 18 ++++++++++++++++++ drivers/usb/core/hub.c | 5 ++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index cdee5130638b..7ebdf2a4e8fe 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1331,6 +1331,24 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) */ if (udev->parent && !PMSG_IS_AUTO(msg)) status = 0; + + /* + * If the device is inaccessible, don't try to resume + * suspended interfaces and just return the error. + */ + if (status && status != -EBUSY) { + int err; + u16 devstat; + + err = usb_get_status(udev, USB_RECIP_DEVICE, 0, + &devstat); + if (err) { + dev_err(&udev->dev, + "Failed to suspend device, error %d\n", + status); + goto done; + } + } } /* If the suspend failed, resume interfaces that did get suspended */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 5286bf67869a..2e047f982af3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1066,6 +1066,9 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) portstatus = portchange = 0; status = hub_port_status(hub, port1, &portstatus, &portchange); + if (status) + goto abort; + if (udev || (portstatus & USB_PORT_STAT_CONNECTION)) dev_dbg(&port_dev->dev, "status %04x change %04x\n", portstatus, portchange); @@ -1198,7 +1201,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Scan all ports that need attention */ kick_hub_wq(hub); - + abort: if (type == HUB_INIT2 || type == HUB_INIT3) { /* Allow autosuspend if it was suppressed */ disconnected: From 12001f3a456a7714e71ef324d12baf520a7c524d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Mar 2017 14:30:50 -0700 Subject: [PATCH 050/465] usb: hub: Do not attempt to autosuspend disconnected devices commit f5cccf49428447dfbc9edb7a04bb8fc316269781 upstream. While running a bind/unbind stress test with the dwc3 usb driver on rk3399, the following crash was observed. Unable to handle kernel NULL pointer dereference at virtual address 00000218 pgd = ffffffc00165f000 [00000218] *pgd=000000000174f003, *pud=000000000174f003, *pmd=0000000001750003, *pte=00e8000001751713 Internal error: Oops: 96000005 [#1] PREEMPT SMP Modules linked in: uinput uvcvideo videobuf2_vmalloc cmac ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat rfcomm xt_mark fuse bridge stp llc zram btusb btrtl btbcm btintel bluetooth ip6table_filter mwifiex_pcie mwifiex cfg80211 cdc_ether usbnet r8152 mii joydev snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device ppp_async ppp_generic slhc tun CPU: 1 PID: 29814 Comm: kworker/1:1 Not tainted 4.4.52 #507 Hardware name: Google Kevin (DT) Workqueue: pm pm_runtime_work task: ffffffc0ac540000 ti: ffffffc0af4d4000 task.ti: ffffffc0af4d4000 PC is at autosuspend_check+0x74/0x174 LR is at autosuspend_check+0x70/0x174 ... Call trace: [] autosuspend_check+0x74/0x174 [] usb_runtime_idle+0x20/0x40 [] __rpm_callback+0x48/0x7c [] rpm_idle+0x1e8/0x498 [] pm_runtime_work+0x88/0xcc [] process_one_work+0x390/0x6b8 [] worker_thread+0x480/0x610 [] kthread+0x164/0x178 [] ret_from_fork+0x10/0x40 Source: (gdb) l *0xffffffc00080dcc0 0xffffffc00080dcc0 is in autosuspend_check (drivers/usb/core/driver.c:1778). 1773 /* We don't need to check interfaces that are 1774 * disabled for runtime PM. Either they are unbound 1775 * or else their drivers don't support autosuspend 1776 * and so they are permanently active. 1777 */ 1778 if (intf->dev.power.disable_depth) 1779 continue; 1780 if (atomic_read(&intf->dev.power.usage_count) > 0) 1781 return -EBUSY; 1782 w |= intf->needs_remote_wakeup; Code analysis shows that intf is set to NULL in usb_disable_device() prior to setting actconfig to NULL. At the same time, usb_runtime_idle() does not lock the usb device, and neither does any of the functions in the traceback. This means that there is no protection against a race condition where usb_disable_device() is removing dev->actconfig->interface[] pointers while those are being accessed from autosuspend_check(). To solve the problem, synchronize and validate device state between autosuspend_check() and usb_disconnect(). Acked-by: Alan Stern Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 3 +++ drivers/usb/core/hub.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 7ebdf2a4e8fe..eb87a259d55c 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1781,6 +1781,9 @@ static int autosuspend_check(struct usb_device *udev) int w, i; struct usb_interface *intf; + if (udev->state == USB_STATE_NOTATTACHED) + return -ENODEV; + /* Fail if autosuspend is disabled, or any interfaces are in use, or * any interface drivers require remote wakeup but it isn't available. */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 2e047f982af3..9dca59ef18b3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2087,6 +2087,12 @@ void usb_disconnect(struct usb_device **pdev) dev_info(&udev->dev, "USB disconnect, device number %d\n", udev->devnum); + /* + * Ensure that the pm runtime code knows that the USB device + * is in the process of being disconnected. + */ + pm_runtime_barrier(&udev->dev); + usb_lock_device(udev); hub_disconnect_children(udev); From 0dc0e26fee698efdba2260e83ecf26dffef5fe09 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Wed, 19 Apr 2017 20:50:15 +0530 Subject: [PATCH 051/465] x86/boot: Fix BSS corruption/overwrite bug in early x86 kernel startup commit d594aa0277e541bb997aef0bc0a55172d8138340 upstream. The minimum size for a new stack (512 bytes) setup for arch/x86/boot components when the bootloader does not setup/provide a stack for the early boot components is not "enough". The setup code executing as part of early kernel startup code, uses the stack beyond 512 bytes and accidentally overwrites and corrupts part of the BSS section. This is exposed mostly in the early video setup code, where it was corrupting BSS variables like force_x, force_y, which in-turn affected kernel parameters such as screen_info (screen_info.orig_video_cols) and later caused an exception/panic in console_init(). Most recent boot loaders setup the stack for early boot components, so this stack overwriting into BSS section issue has not been exposed. Signed-off-by: Ashish Kalra Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170419152015.10011-1-ashishkalra@Ashishs-MacBook-Pro.local Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/boot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 9b42b6d1e902..ef5a9cc66fb8 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -16,7 +16,7 @@ #ifndef BOOT_BOOT_H #define BOOT_BOOT_H -#define STACK_SIZE 512 /* Minimum number of bytes for stack */ +#define STACK_SIZE 1024 /* Minimum number of bytes for stack */ #ifndef __ASSEMBLY__ From 20fd61dbb15479b8940661f3be8aaebd478b5844 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 22 Mar 2017 14:32:29 -0700 Subject: [PATCH 052/465] selftests/x86/ldt_gdt_32: Work around a glibc sigaction() bug commit 65973dd3fd31151823f4b8c289eebbb3fb7e6bc0 upstream. i386 glibc is buggy and calls the sigaction syscall incorrectly. This is asymptomatic for normal programs, but it blows up on programs that do evil things with segmentation. The ldt_gdt self-test is an example of such an evil program. This doesn't appear to be a regression -- I think I just got lucky with the uninitialized memory that glibc threw at the kernel when I wrote the test. This hackish fix manually issues sigaction(2) syscalls to undo the damage. Without the fix, ldt_gdt_32 segfaults; with the fix, it passes for me. See: https://sourceware.org/bugzilla/show_bug.cgi?id=21269 Signed-off-by: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Garnier Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/aaab0f9f93c9af25396f01232608c163a760a668.1490218061.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/ldt_gdt.c | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index f6121612e769..b9a22f18566a 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -409,6 +409,51 @@ static void *threadproc(void *ctx) } } +#ifdef __i386__ + +#ifndef SA_RESTORE +#define SA_RESTORER 0x04000000 +#endif + +/* + * The UAPI header calls this 'struct sigaction', which conflicts with + * glibc. Sigh. + */ +struct fake_ksigaction { + void *handler; /* the real type is nasty */ + unsigned long sa_flags; + void (*sa_restorer)(void); + unsigned char sigset[8]; +}; + +static void fix_sa_restorer(int sig) +{ + struct fake_ksigaction ksa; + + if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) { + /* + * glibc has a nasty bug: it sometimes writes garbage to + * sa_restorer. This interacts quite badly with anything + * that fiddles with SS because it can trigger legacy + * stack switching. Patch it up. See: + * + * https://sourceware.org/bugzilla/show_bug.cgi?id=21269 + */ + if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) { + ksa.sa_restorer = NULL; + if (syscall(SYS_rt_sigaction, sig, &ksa, NULL, + sizeof(ksa.sigset)) != 0) + err(1, "rt_sigaction"); + } + } +} +#else +static void fix_sa_restorer(int sig) +{ + /* 64-bit glibc works fine. */ +} +#endif + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -420,6 +465,7 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), if (sigaction(sig, &sa, 0)) err(1, "sigaction"); + fix_sa_restorer(sig); } static jmp_buf jmpbuf; From f25c69bd5ed57231bdf47aa88837a74141b4dc81 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 9 May 2017 18:00:43 +0100 Subject: [PATCH 053/465] x86, pmem: Fix cache flushing for iovec write < 8 bytes commit 8376efd31d3d7c44bd05be337adde023cc531fa1 upstream. Commit 11e63f6d920d added cache flushing for unaligned writes from an iovec, covering the first and last cache line of a >= 8 byte write and the first cache line of a < 8 byte write. But an unaligned write of 2-7 bytes can still cover two cache lines, so make sure we flush both in that case. Fixes: 11e63f6d920d ("x86, pmem: fix broken __copy_user_nocache ...") Signed-off-by: Ben Hutchings Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 529bb4a6487a..e2904373010d 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -103,7 +103,7 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, if (bytes < 8) { if (!IS_ALIGNED(dest, 4) || (bytes != 4)) - arch_wb_cache_pmem(addr, 1); + arch_wb_cache_pmem(addr, bytes); } else { if (!IS_ALIGNED(dest, 8)) { dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); From 9e748196781e904272e9d0e266deccca92ba8215 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sat, 1 Apr 2017 00:41:57 +0200 Subject: [PATCH 054/465] um: Fix PTRACE_POKEUSER on x86_64 commit 9abc74a22d85ab29cef9896a2582a530da7e79bf upstream. This is broken since ever but sadly nobody noticed. Recent versions of GDB set DR_CONTROL unconditionally and UML dies due to a heap corruption. It turns out that the PTRACE_POKEUSER was copy&pasted from i386 and assumes that addresses are 4 bytes long. Fix that by using 8 as address size in the calculation. Reported-by: jie cao Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/x86/um/ptrace_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index a5c9910d234f..09a085bde0d4 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -125,7 +125,7 @@ int poke_user(struct task_struct *child, long addr, long data) else if ((addr >= offsetof(struct user, u_debugreg[0])) && (addr <= offsetof(struct user, u_debugreg[7]))) { addr -= offsetof(struct user, u_debugreg[0]); - addr = addr >> 2; + addr = addr >> 3; if ((addr == 4) || (addr == 5)) return -EIO; child->thread.arch.debugregs[addr] = data; From d85d4c871a00afc100d0c62a0faf86103b5c095f Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 2 May 2017 14:08:50 -0400 Subject: [PATCH 055/465] perf/x86: Fix Broadwell-EP DRAM RAPL events commit 33b88e708e7dfa58dc896da2a98f5719d2eb315c upstream. It appears as though the Broadwell-EP DRAM units share the special units quirk with Haswell-EP/KNL. Without this patch, you get really high results (a single DRAM using 20W of power). The powercap driver in drivers/powercap/intel_rapl.c already has this change. Signed-off-by: Vince Weaver Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Stephane Eranian Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/rapl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 9d05c7e67f60..a45e2114a846 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -761,7 +761,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), From 2d271c9508474367a7526f636a672ea3eadba58e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 23 Mar 2017 11:46:03 +0100 Subject: [PATCH 056/465] KVM: x86: fix user triggerable warning in kvm_apic_accept_events() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 28bf28887976d8881a3a59491896c718fade7355 upstream. If we already entered/are about to enter SMM, don't allow switching to INIT/SIPI_RECEIVED, otherwise the next call to kvm_apic_accept_events() will report a warning. Same applies if we are already in MP state INIT_RECEIVED and SMM is requested to be turned on. Refuse to set the VCPU events in this case. Fixes: cd7764fe9f73 ("KVM: x86: latch INITs while in system management mode") Reported-by: Dmitry Vyukov Signed-off-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ccbd45ecd41a..c9adc6a64b9f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3127,6 +3127,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) return -EINVAL; + /* INITs are latched while in SMM */ + if (events->flags & KVM_VCPUEVENT_VALID_SMM && + (events->smi.smm || events->smi.pending) && + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + return -EINVAL; + process_nmi(vcpu); vcpu->arch.exception.pending = events->exception.injected; vcpu->arch.exception.nr = events->exception.nr; @@ -7355,6 +7361,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, mp_state->mp_state != KVM_MP_STATE_RUNNABLE) return -EINVAL; + /* INITs are latched while in SMM */ + if ((is_smm(vcpu) || vcpu->arch.smi_pending) && + (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || + mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) + return -EINVAL; + if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); From d1daa545bbc0945f10e420d859d849252de7affb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 May 2017 16:20:18 +0200 Subject: [PATCH 057/465] Revert "KVM: Support vCPU-based gfn->hva cache" commit 4e335d9e7ddbcf83d03e7fbe65797ebed2272c18 upstream. This reverts commit bbd6411513aa8ef3ea02abab61318daf87c1af1e. I've been sitting on this revert for too long and it unfortunately missed 4.11. It's also the reason why I haven't merged ring-based dirty tracking for 4.12. Using kvm_vcpu_memslots in kvm_gfn_to_hva_cache_init and kvm_vcpu_write_guest_offset_cached means that the MSR value can now be used to access SMRAM, simply by making it point to an SMRAM physical address. This is problematic because it lets the guest OS overwrite memory that it shouldn't be able to touch. Fixes: bbd6411513aa8ef3ea02abab61318daf87c1af1e Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 22 +++++++++++---------- arch/x86/kvm/x86.c | 41 ++++++++++++++++++++-------------------- include/linux/kvm_host.h | 16 ++++++++-------- virt/kvm/kvm_main.c | 34 ++++++++++++++++----------------- 4 files changed, 58 insertions(+), 55 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bad6a25067bc..9fa5b8164961 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -529,14 +529,16 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) { - return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val, - sizeof(val)); + + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, + sizeof(val)); } static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) { - return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val, - sizeof(*val)); + + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, + sizeof(*val)); } static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) @@ -2285,8 +2287,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32))) + if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32))) return; apic_set_tpr(vcpu->arch.apic, data & 0xff); @@ -2338,14 +2340,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) max_isr = 0; data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32)); + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32)); } int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) { if (vapic_addr) { - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apic->vapic_cache, vapic_addr, sizeof(u32))) return -EINVAL; @@ -2439,7 +2441,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) vcpu->arch.pv_eoi.msr_val = data; if (!pv_eoi_enabled(vcpu)) return 0; - return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data, + return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, addr, sizeof(u8)); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9adc6a64b9f..421a069b5429 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1813,7 +1813,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) struct kvm_vcpu_arch *vcpu = &v->arch; struct pvclock_vcpu_time_info guest_hv_clock; - if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time, + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, &guest_hv_clock, sizeof(guest_hv_clock)))) return; @@ -1834,9 +1834,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); vcpu->hv_clock.version = guest_hv_clock.version + 1; - kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); smp_wmb(); @@ -1850,16 +1850,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); - kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); smp_wmb(); vcpu->hv_clock.version++; - kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); } static int kvm_guest_time_update(struct kvm_vcpu *v) @@ -2092,7 +2092,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, sizeof(u32))) return 1; @@ -2111,7 +2111,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime, + if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; @@ -2122,7 +2122,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) vcpu->arch.st.steal.version += 1; - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); smp_wmb(); @@ -2131,14 +2131,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu) vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); smp_wmb(); vcpu->arch.st.steal.version += 1; - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); } @@ -2243,7 +2243,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & 1)) break; - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_time, data & ~1ULL, sizeof(struct pvclock_vcpu_time_info))) vcpu->arch.pv_time_enabled = false; @@ -2264,7 +2264,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, data & KVM_STEAL_VALID_BITS, sizeof(struct kvm_steal_time))) return 1; @@ -2878,7 +2878,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) vcpu->arch.st.steal.preempted = 1; - kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal.preempted, offsetof(struct kvm_steal_time, preempted), sizeof(vcpu->arch.st.steal.preempted)); @@ -8548,8 +8548,9 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) { - return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val, - sizeof(val)); + + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, + sizeof(val)); } void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d0250744507a..333af5a41c78 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -641,18 +641,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); -int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); -int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); -int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len); -int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - gpa_t gpa, unsigned long len); +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len); +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 88257b311cb5..7e80f62f034c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1973,18 +1973,18 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, return 0; } -int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init); +EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); -int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len) +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); int r; gpa_t gpa = ghc->gpa + offset; @@ -1994,7 +1994,7 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_ __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_vcpu_write_guest(vcpu, gpa, data, len); + return kvm_write_guest(kvm, gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2006,19 +2006,19 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_ return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached); +EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); -int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len); + return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached); +EXPORT_SYMBOL_GPL(kvm_write_guest_cached); -int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); int r; BUG_ON(len > ghc->len); @@ -2027,7 +2027,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len); + return kvm_read_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2038,7 +2038,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached); +EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { From 95121cc98ca0f9935c1d358180ecac1e4b161b64 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Apr 2017 17:59:58 +0200 Subject: [PATCH 058/465] KVM: arm/arm64: fix races in kvm_psci_vcpu_on commit 6c7a5dce22b3f3cc44be098e2837fa6797edb8b8 upstream. Fix potential races in kvm_psci_vcpu_on() by taking the kvm->lock mutex. In general, it's a bad idea to allow more than one PSCI_CPU_ON to process the same target VCPU at the same time. One such problem that may arise is that one PSCI_CPU_ON could be resetting the target vcpu, which fills the entire sys_regs array with a temporary value including the MPIDR register, while another looks up the VCPU based on the MPIDR value, resulting in no target VCPU found. Resolves both races found with the kvm-unit-tests/arm/psci unit test. Reviewed-by: Marc Zyngier Reviewed-by: Christoffer Dall Reported-by: Levente Kurusa Suggested-by: Christoffer Dall Signed-off-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/psci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index c2b131527a64..a08d7a93aebb 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -208,9 +208,10 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { - int ret = 1; + struct kvm *kvm = vcpu->kvm; unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); unsigned long val; + int ret = 1; switch (psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: @@ -230,7 +231,9 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) break; case PSCI_0_2_FN_CPU_ON: case PSCI_0_2_FN64_CPU_ON: + mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN64_AFFINITY_INFO: @@ -279,6 +282,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -288,7 +292,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: + mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); break; default: val = PSCI_RET_NOT_SUPPORTED; From d97830327509462d6013128ab84b05036e939c1b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Apr 2017 19:06:48 +0100 Subject: [PATCH 059/465] arm64: KVM: Fix decoding of Rt/Rt2 when trapping AArch32 CP accesses commit c667186f1c01ca8970c785888868b7ffd74e51ee upstream. Our 32bit CP14/15 handling inherited some of the ARMv7 code for handling the trapped system registers, completely missing the fact that the fields for Rt and Rt2 are now 5 bit wide, and not 4... Let's fix it, and provide an accessor for the most common Rt case. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_emulate.h | 6 ++++++ arch/arm64/kvm/sys_regs.c | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index f5ea0ba70f07..fe39e6841326 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -240,6 +240,12 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; } +static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; +} + static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0e26f8c2b56f..79168b38eeba 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1638,8 +1638,8 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); - int Rt = (hsr >> 5) & 0xf; - int Rt2 = (hsr >> 10) & 0xf; + int Rt = kvm_vcpu_sys_get_rt(vcpu); + int Rt2 = (hsr >> 10) & 0x1f; params.is_aarch32 = true; params.is_32bit = false; @@ -1690,7 +1690,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); - int Rt = (hsr >> 5) & 0xf; + int Rt = kvm_vcpu_sys_get_rt(vcpu); params.is_aarch32 = true; params.is_32bit = true; @@ -1805,7 +1805,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_hsr(vcpu); - int Rt = (esr >> 5) & 0x1f; + int Rt = kvm_vcpu_sys_get_rt(vcpu); int ret; trace_kvm_handle_sys_reg(esr); From d9ae27b661af72b7140dd2b700c937f5e273cd2c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 22 Apr 2017 17:22:09 -0400 Subject: [PATCH 060/465] block: fix blk_integrity_register to use template's interval_exp if not 0 commit 2859323e35ab5fc42f351fbda23ab544eaa85945 upstream. When registering an integrity profile: if the template's interval_exp is not 0 use it, otherwise use the ilog2() of logical block size of the provided gendisk. This fixes a long-standing DM linear target bug where it cannot pass integrity data to the underlying device if its logical block size conflicts with the underlying device's logical block size. Reported-by: Mikulas Patocka Signed-off-by: Mike Snitzer Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-integrity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 35c5af1ea068..e4ebd79de679 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -412,7 +412,8 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE | template->flags; - bi->interval_exp = ilog2(queue_logical_block_size(disk->queue)); + bi->interval_exp = template->interval_exp ? : + ilog2(queue_logical_block_size(disk->queue)); bi->profile = template->profile ? template->profile : &nop_profile; bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; From 72a03cf8e35fbff333ecf2edaf33febd5d6fe60d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Mar 2017 16:49:19 +0200 Subject: [PATCH 061/465] crypto: s5p-sss - Close possible race for completed requests commit 42d5c176b76e190a4a3e0dfeffdae661755955b6 upstream. Driver is capable of handling only one request at a time and it stores it in its state container struct s5p_aes_dev. This stored request must be protected between concurrent invocations (e.g. completing current request and scheduling new one). Combination of lock and "busy" field is used for that purpose. When "busy" field is true, the driver will not accept new request thus it will not overwrite currently handled data. However commit 28b62b145868 ("crypto: s5p-sss - Fix spinlock recursion on LRW(AES)") moved some of the write to "busy" field out of a lock protected critical section. This might lead to potential race between completing current request and scheduling a new one. Effectively the request completion might try to operate on new crypto request. Fixes: 28b62b145868 ("crypto: s5p-sss - Fix spinlock recursion on LRW(AES)") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/s5p-sss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 1b9da3dc799b..6c620487e9c2 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -287,7 +287,6 @@ static void s5p_sg_done(struct s5p_aes_dev *dev) static void s5p_aes_complete(struct s5p_aes_dev *dev, int err) { dev->req->base.complete(&dev->req->base, err); - dev->busy = false; } static void s5p_unset_outdata(struct s5p_aes_dev *dev) @@ -462,7 +461,7 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&dev->lock, flags); s5p_aes_complete(dev, 0); - dev->busy = true; + /* Device is still busy */ tasklet_schedule(&dev->tasklet); } else { /* @@ -483,6 +482,7 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id) error: s5p_sg_done(dev); + dev->busy = false; spin_unlock_irqrestore(&dev->lock, flags); s5p_aes_complete(dev, err); @@ -634,6 +634,7 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) indata_error: s5p_sg_done(dev); + dev->busy = false; spin_unlock_irqrestore(&dev->lock, flags); s5p_aes_complete(dev, err); } From b576fed7c831d4aeaf29029770066a8d69bef230 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Mon, 24 Apr 2017 11:15:23 +0200 Subject: [PATCH 062/465] crypto: algif_aead - Require setkey before accept(2) commit 2a2a251f110576b1d89efbd0662677d7e7db21a8 upstream. Some cipher implementations will crash if you try to use them without calling setkey first. This patch adds a check so that the accept(2) call will fail with -ENOKEY if setkey hasn't been done on the socket yet. Fixes: 400c40cf78da ("crypto: algif - add AEAD support") Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algif_aead.c | 157 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 149 insertions(+), 8 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index ef59d9926ee9..8af664f7d27c 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -45,6 +45,11 @@ struct aead_async_req { char iv[]; }; +struct aead_tfm { + struct crypto_aead *aead; + bool has_key; +}; + struct aead_ctx { struct aead_sg_list tsgl; struct aead_async_rsgl first_rsgl; @@ -723,24 +728,146 @@ static struct proto_ops algif_aead_ops = { .poll = aead_poll, }; +static int aead_check_key(struct socket *sock) +{ + int err = 0; + struct sock *psk; + struct alg_sock *pask; + struct aead_tfm *tfm; + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + + lock_sock(sk); + if (ask->refcnt) + goto unlock_child; + + psk = ask->parent; + pask = alg_sk(ask->parent); + tfm = pask->private; + + err = -ENOKEY; + lock_sock_nested(psk, SINGLE_DEPTH_NESTING); + if (!tfm->has_key) + goto unlock; + + if (!pask->refcnt++) + sock_hold(psk); + + ask->refcnt = 1; + sock_put(psk); + + err = 0; + +unlock: + release_sock(psk); +unlock_child: + release_sock(sk); + + return err; +} + +static int aead_sendmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t size) +{ + int err; + + err = aead_check_key(sock); + if (err) + return err; + + return aead_sendmsg(sock, msg, size); +} + +static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + int err; + + err = aead_check_key(sock); + if (err) + return err; + + return aead_sendpage(sock, page, offset, size, flags); +} + +static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) +{ + int err; + + err = aead_check_key(sock); + if (err) + return err; + + return aead_recvmsg(sock, msg, ignored, flags); +} + +static struct proto_ops algif_aead_ops_nokey = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .accept = sock_no_accept, + .setsockopt = sock_no_setsockopt, + + .release = af_alg_release, + .sendmsg = aead_sendmsg_nokey, + .sendpage = aead_sendpage_nokey, + .recvmsg = aead_recvmsg_nokey, + .poll = aead_poll, +}; + static void *aead_bind(const char *name, u32 type, u32 mask) { - return crypto_alloc_aead(name, type, mask); + struct aead_tfm *tfm; + struct crypto_aead *aead; + + tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); + if (!tfm) + return ERR_PTR(-ENOMEM); + + aead = crypto_alloc_aead(name, type, mask); + if (IS_ERR(aead)) { + kfree(tfm); + return ERR_CAST(aead); + } + + tfm->aead = aead; + + return tfm; } static void aead_release(void *private) { - crypto_free_aead(private); + struct aead_tfm *tfm = private; + + crypto_free_aead(tfm->aead); + kfree(tfm); } static int aead_setauthsize(void *private, unsigned int authsize) { - return crypto_aead_setauthsize(private, authsize); + struct aead_tfm *tfm = private; + + return crypto_aead_setauthsize(tfm->aead, authsize); } static int aead_setkey(void *private, const u8 *key, unsigned int keylen) { - return crypto_aead_setkey(private, key, keylen); + struct aead_tfm *tfm = private; + int err; + + err = crypto_aead_setkey(tfm->aead, key, keylen); + tfm->has_key = !err; + + return err; } static void aead_sock_destruct(struct sock *sk) @@ -757,12 +884,14 @@ static void aead_sock_destruct(struct sock *sk) af_alg_release_parent(sk); } -static int aead_accept_parent(void *private, struct sock *sk) +static int aead_accept_parent_nokey(void *private, struct sock *sk) { struct aead_ctx *ctx; struct alg_sock *ask = alg_sk(sk); - unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(private); - unsigned int ivlen = crypto_aead_ivsize(private); + struct aead_tfm *tfm = private; + struct crypto_aead *aead = tfm->aead; + unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(aead); + unsigned int ivlen = crypto_aead_ivsize(aead); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) @@ -789,7 +918,7 @@ static int aead_accept_parent(void *private, struct sock *sk) ask->private = ctx; - aead_request_set_tfm(&ctx->aead_req, private); + aead_request_set_tfm(&ctx->aead_req, aead); aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_complete, &ctx->completion); @@ -798,13 +927,25 @@ static int aead_accept_parent(void *private, struct sock *sk) return 0; } +static int aead_accept_parent(void *private, struct sock *sk) +{ + struct aead_tfm *tfm = private; + + if (!tfm->has_key) + return -ENOKEY; + + return aead_accept_parent_nokey(private, sk); +} + static const struct af_alg_type algif_type_aead = { .bind = aead_bind, .release = aead_release, .setkey = aead_setkey, .setauthsize = aead_setauthsize, .accept = aead_accept_parent, + .accept_nokey = aead_accept_parent_nokey, .ops = &algif_aead_ops, + .ops_nokey = &algif_aead_ops_nokey, .name = "aead", .owner = THIS_MODULE }; From c133daf731d936e9861c99fcc8d303e1cd11645f Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Thu, 20 Apr 2017 15:24:09 -0500 Subject: [PATCH 063/465] crypto: ccp - Use only the relevant interrupt bits commit 56467cb11cf8ae4db9003f54b3d3425b5f07a10a upstream. Each CCP queue can product interrupts for 4 conditions: operation complete, queue empty, error, and queue stopped. This driver only works with completion and error events. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-dev-v5.c | 9 +++++---- drivers/crypto/ccp/ccp-dev.h | 5 ++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index fc08b4ed69d9..a9e93155aad0 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -748,7 +748,7 @@ static int ccp5_init(struct ccp_device *ccp) ioread32(cmd_q->reg_status); /* Clear the interrupts */ - iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); } dev_dbg(dev, "Requesting an IRQ...\n"); @@ -829,7 +829,7 @@ static int ccp5_init(struct ccp_device *ccp) /* Enable interrupts */ for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; - iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable); + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_int_enable); } dev_dbg(dev, "Registering device...\n"); @@ -889,7 +889,7 @@ static void ccp5_destroy(struct ccp_device *ccp) iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control); /* Disable the interrupts */ - iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); /* Clear the interrupt status */ iowrite32(0x00, cmd_q->reg_int_enable); @@ -949,7 +949,8 @@ static irqreturn_t ccp5_irq_handler(int irq, void *data) cmd_q->int_rcvd = 1; /* Acknowledge the interrupt and wake the kthread */ - iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status); + iowrite32(SUPPORTED_INTERRUPTS, + cmd_q->reg_interrupt_status); wake_up_interruptible(&cmd_q->int_queue); } } diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index aa36f3f81860..8d9c5dd20268 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -109,9 +109,8 @@ #define INT_COMPLETION 0x1 #define INT_ERROR 0x2 #define INT_QUEUE_STOPPED 0x4 -#define ALL_INTERRUPTS (INT_COMPLETION| \ - INT_ERROR| \ - INT_QUEUE_STOPPED) +#define INT_EMPTY_QUEUE 0x8 +#define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR) #define LSB_REGION_WIDTH 5 #define MAX_LSB_CNT 8 From 511b79fb949e8eacfbfd0c750700e814c41e4438 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Thu, 20 Apr 2017 15:24:22 -0500 Subject: [PATCH 064/465] crypto: ccp - Disable interrupts early on unload commit 116591fe3eef11c6f06b662c9176385f13891183 upstream. Ensure that we disable interrupts first when shutting down the driver. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-dev-v5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index a9e93155aad0..78f014e2a51b 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -889,10 +889,10 @@ static void ccp5_destroy(struct ccp_device *ccp) iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control); /* Disable the interrupts */ - iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); + iowrite32(0x00, cmd_q->reg_int_enable); /* Clear the interrupt status */ - iowrite32(0x00, cmd_q->reg_int_enable); + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); } From 747cbf47532650c5883ffa598fd290ea048acca9 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 21 Apr 2017 10:50:05 -0500 Subject: [PATCH 065/465] crypto: ccp - Change ISR handler method for a v3 CCP commit 7b537b24e76a1e8e6d7ea91483a45d5b1426809b upstream. The CCP has the ability to perform several operations simultaneously, but only one interrupt. When implemented as a PCI device and using MSI-X/MSI interrupts, use a tasklet model to service interrupts. By disabling and enabling interrupts from the CCP, coupled with the queuing that tasklets provide, we can ensure that all events (occurring on the device) are recognized and serviced. This change fixes a problem wherein 2 or more busy queues can cause notification bits to change state while a (CCP) interrupt is being serviced, but after the queue state has been evaluated. This results in the event being 'lost' and the queue hanging, waiting to be serviced. Since the status bits are never fully de-asserted, the CCP never generates another interrupt (all bits zero -> one or more bits one), and no further CCP operations will be executed. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-dev-v3.c | 120 +++++++++++++++++++------------- drivers/crypto/ccp/ccp-dev.h | 3 + drivers/crypto/ccp/ccp-pci.c | 2 + 3 files changed, 75 insertions(+), 50 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 7bc09989e18a..c46eeda71595 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -315,17 +315,73 @@ static int ccp_perform_ecc(struct ccp_op *op) return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } +static void ccp_disable_queue_interrupts(struct ccp_device *ccp) +{ + iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); +} + +static void ccp_enable_queue_interrupts(struct ccp_device *ccp) +{ + iowrite32(ccp->qim, ccp->io_regs + IRQ_MASK_REG); +} + +static void ccp_irq_bh(unsigned long data) +{ + struct ccp_device *ccp = (struct ccp_device *)data; + struct ccp_cmd_queue *cmd_q; + u32 q_int, status; + unsigned int i; + + status = ioread32(ccp->io_regs + IRQ_STATUS_REG); + + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + q_int = status & (cmd_q->int_ok | cmd_q->int_err); + if (q_int) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_status); + cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + + /* On error, only save the first error value */ + if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + cmd_q->int_rcvd = 1; + + /* Acknowledge the interrupt and wake the kthread */ + iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); + wake_up_interruptible(&cmd_q->int_queue); + } + } + ccp_enable_queue_interrupts(ccp); +} + +static irqreturn_t ccp_irq_handler(int irq, void *data) +{ + struct device *dev = data; + struct ccp_device *ccp = dev_get_drvdata(dev); + + ccp_disable_queue_interrupts(ccp); + if (ccp->use_tasklet) + tasklet_schedule(&ccp->irq_tasklet); + else + ccp_irq_bh((unsigned long)ccp); + + return IRQ_HANDLED; +} + static int ccp_init(struct ccp_device *ccp) { struct device *dev = ccp->dev; struct ccp_cmd_queue *cmd_q; struct dma_pool *dma_pool; char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; - unsigned int qmr, qim, i; + unsigned int qmr, i; int ret; /* Find available queues */ - qim = 0; + ccp->qim = 0; qmr = ioread32(ccp->io_regs + Q_MASK_REG); for (i = 0; i < MAX_HW_QUEUES; i++) { if (!(qmr & (1 << i))) @@ -370,7 +426,7 @@ static int ccp_init(struct ccp_device *ccp) init_waitqueue_head(&cmd_q->int_queue); /* Build queue interrupt mask (two interrupts per queue) */ - qim |= cmd_q->int_ok | cmd_q->int_err; + ccp->qim |= cmd_q->int_ok | cmd_q->int_err; #ifdef CONFIG_ARM64 /* For arm64 set the recommended queue cache settings */ @@ -388,14 +444,14 @@ static int ccp_init(struct ccp_device *ccp) dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); /* Disable and clear interrupts until ready */ - iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); + ccp_disable_queue_interrupts(ccp); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); } - iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG); /* Request an irq */ ret = ccp->get_irq(ccp); @@ -404,6 +460,11 @@ static int ccp_init(struct ccp_device *ccp) goto e_pool; } + /* Initialize the ISR tasklet? */ + if (ccp->use_tasklet) + tasklet_init(&ccp->irq_tasklet, ccp_irq_bh, + (unsigned long)ccp); + dev_dbg(dev, "Starting threads...\n"); /* Create a kthread for each queue */ for (i = 0; i < ccp->cmd_q_count; i++) { @@ -426,7 +487,7 @@ static int ccp_init(struct ccp_device *ccp) dev_dbg(dev, "Enabling interrupts...\n"); /* Enable interrupts */ - iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); + ccp_enable_queue_interrupts(ccp); dev_dbg(dev, "Registering device...\n"); ccp_add_device(ccp); @@ -463,7 +524,7 @@ static void ccp_destroy(struct ccp_device *ccp) { struct ccp_cmd_queue *cmd_q; struct ccp_cmd *cmd; - unsigned int qim, i; + unsigned int i; /* Unregister the DMA engine */ ccp_dmaengine_unregister(ccp); @@ -474,22 +535,15 @@ static void ccp_destroy(struct ccp_device *ccp) /* Remove this device from the list of available units */ ccp_del_device(ccp); - /* Build queue interrupt mask (two interrupt masks per queue) */ - qim = 0; - for (i = 0; i < ccp->cmd_q_count; i++) { - cmd_q = &ccp->cmd_q[i]; - qim |= cmd_q->int_ok | cmd_q->int_err; - } - /* Disable and clear interrupts */ - iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); + ccp_disable_queue_interrupts(ccp); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); } - iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG); /* Stop the queue kthreads */ for (i = 0; i < ccp->cmd_q_count; i++) @@ -516,40 +570,6 @@ static void ccp_destroy(struct ccp_device *ccp) } } -static irqreturn_t ccp_irq_handler(int irq, void *data) -{ - struct device *dev = data; - struct ccp_device *ccp = dev_get_drvdata(dev); - struct ccp_cmd_queue *cmd_q; - u32 q_int, status; - unsigned int i; - - status = ioread32(ccp->io_regs + IRQ_STATUS_REG); - - for (i = 0; i < ccp->cmd_q_count; i++) { - cmd_q = &ccp->cmd_q[i]; - - q_int = status & (cmd_q->int_ok | cmd_q->int_err); - if (q_int) { - cmd_q->int_status = status; - cmd_q->q_status = ioread32(cmd_q->reg_status); - cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); - - /* On error, only save the first error value */ - if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) - cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); - - cmd_q->int_rcvd = 1; - - /* Acknowledge the interrupt and wake the kthread */ - iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); - wake_up_interruptible(&cmd_q->int_queue); - } - } - - return IRQ_HANDLED; -} - static const struct ccp_actions ccp3_actions = { .aes = ccp_perform_aes, .xts_aes = ccp_perform_xts_aes, diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 8d9c5dd20268..6bb60e11b0e6 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -336,7 +336,10 @@ struct ccp_device { void *dev_specific; int (*get_irq)(struct ccp_device *ccp); void (*free_irq)(struct ccp_device *ccp); + unsigned int qim; unsigned int irq; + bool use_tasklet; + struct tasklet_struct irq_tasklet; /* I/O area used for device communication. The register mapping * starts at an offset into the mapped bar. diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c index 28a9996c1085..e880d4cf4ada 100644 --- a/drivers/crypto/ccp/ccp-pci.c +++ b/drivers/crypto/ccp/ccp-pci.c @@ -69,6 +69,7 @@ static int ccp_get_msix_irqs(struct ccp_device *ccp) goto e_irq; } } + ccp->use_tasklet = true; return 0; @@ -100,6 +101,7 @@ static int ccp_get_msi_irq(struct ccp_device *ccp) dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret); goto e_msi; } + ccp->use_tasklet = true; return 0; From 6ab60d2b26dd94a7918bd1da902c2b9bea18ba10 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 21 Apr 2017 10:50:14 -0500 Subject: [PATCH 066/465] crypto: ccp - Change ISR handler method for a v5 CCP commit 6263b51eb3190d30351360fd168959af7e3a49a9 upstream. The CCP has the ability to perform several operations simultaneously, but only one interrupt. When implemented as a PCI device and using MSI-X/MSI interrupts, use a tasklet model to service interrupts. By disabling and enabling interrupts from the CCP, coupled with the queuing that tasklets provide, we can ensure that all events (occurring on the device) are recognized and serviced. This change fixes a problem wherein 2 or more busy queues can cause notification bits to change state while a (CCP) interrupt is being serviced, but after the queue state has been evaluated. This results in the event being 'lost' and the queue hanging, waiting to be serviced. Since the status bits are never fully de-asserted, the CCP never generates another interrupt (all bits zero -> one or more bits one), and no further CCP operations will be executed. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-dev-v5.c | 111 +++++++++++++++++++------------- 1 file changed, 67 insertions(+), 44 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index 78f014e2a51b..4e2b01091715 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -653,6 +653,65 @@ static int ccp_assign_lsbs(struct ccp_device *ccp) return rc; } +static void ccp5_disable_queue_interrupts(struct ccp_device *ccp) +{ + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) + iowrite32(0x0, ccp->cmd_q[i].reg_int_enable); +} + +static void ccp5_enable_queue_interrupts(struct ccp_device *ccp) +{ + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) + iowrite32(SUPPORTED_INTERRUPTS, ccp->cmd_q[i].reg_int_enable); +} + +static void ccp5_irq_bh(unsigned long data) +{ + struct ccp_device *ccp = (struct ccp_device *)data; + u32 status; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; + + status = ioread32(cmd_q->reg_interrupt_status); + + if (status) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_status); + cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + + /* On error, only save the first error value */ + if ((status & INT_ERROR) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + cmd_q->int_rcvd = 1; + + /* Acknowledge the interrupt and wake the kthread */ + iowrite32(status, cmd_q->reg_interrupt_status); + wake_up_interruptible(&cmd_q->int_queue); + } + } + ccp5_enable_queue_interrupts(ccp); +} + +static irqreturn_t ccp5_irq_handler(int irq, void *data) +{ + struct device *dev = data; + struct ccp_device *ccp = dev_get_drvdata(dev); + + ccp5_disable_queue_interrupts(ccp); + if (ccp->use_tasklet) + tasklet_schedule(&ccp->irq_tasklet); + else + ccp5_irq_bh((unsigned long)ccp); + return IRQ_HANDLED; +} + static int ccp5_init(struct ccp_device *ccp) { struct device *dev = ccp->dev; @@ -736,18 +795,17 @@ static int ccp5_init(struct ccp_device *ccp) } /* Turn off the queues and disable interrupts until ready */ + ccp5_disable_queue_interrupts(ccp); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; cmd_q->qcontrol = 0; /* Start with nothing */ iowrite32(cmd_q->qcontrol, cmd_q->reg_control); - /* Disable the interrupts */ - iowrite32(0x00, cmd_q->reg_int_enable); ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); - /* Clear the interrupts */ + /* Clear the interrupt status */ iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); } @@ -758,6 +816,10 @@ static int ccp5_init(struct ccp_device *ccp) dev_err(dev, "unable to allocate an IRQ\n"); goto e_pool; } + /* Initialize the ISR tasklet */ + if (ccp->use_tasklet) + tasklet_init(&ccp->irq_tasklet, ccp5_irq_bh, + (unsigned long)ccp); dev_dbg(dev, "Loading LSB map...\n"); /* Copy the private LSB mask to the public registers */ @@ -826,11 +888,7 @@ static int ccp5_init(struct ccp_device *ccp) } dev_dbg(dev, "Enabling interrupts...\n"); - /* Enable interrupts */ - for (i = 0; i < ccp->cmd_q_count; i++) { - cmd_q = &ccp->cmd_q[i]; - iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_int_enable); - } + ccp5_enable_queue_interrupts(ccp); dev_dbg(dev, "Registering device...\n"); /* Put this on the unit list to make it available */ @@ -882,15 +940,13 @@ static void ccp5_destroy(struct ccp_device *ccp) ccp_del_device(ccp); /* Disable and clear interrupts */ + ccp5_disable_queue_interrupts(ccp); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; /* Turn off the run bit */ iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control); - /* Disable the interrupts */ - iowrite32(0x00, cmd_q->reg_int_enable); - /* Clear the interrupt status */ iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); ioread32(cmd_q->reg_int_status); @@ -925,39 +981,6 @@ static void ccp5_destroy(struct ccp_device *ccp) } } -static irqreturn_t ccp5_irq_handler(int irq, void *data) -{ - struct device *dev = data; - struct ccp_device *ccp = dev_get_drvdata(dev); - u32 status; - unsigned int i; - - for (i = 0; i < ccp->cmd_q_count; i++) { - struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; - - status = ioread32(cmd_q->reg_interrupt_status); - - if (status) { - cmd_q->int_status = status; - cmd_q->q_status = ioread32(cmd_q->reg_status); - cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); - - /* On error, only save the first error value */ - if ((status & INT_ERROR) && !cmd_q->cmd_error) - cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); - - cmd_q->int_rcvd = 1; - - /* Acknowledge the interrupt and wake the kthread */ - iowrite32(SUPPORTED_INTERRUPTS, - cmd_q->reg_interrupt_status); - wake_up_interruptible(&cmd_q->int_queue); - } - } - - return IRQ_HANDLED; -} - static void ccp5_config(struct ccp_device *ccp) { /* Public side */ From f38faf569e923b121d666e59c61ebfe14e9c51cb Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Mon, 24 Apr 2017 14:21:53 +0200 Subject: [PATCH 067/465] dm crypt: rewrite (wipe) key in crypto layer using random data commit c82feeec9a014b72c4ffea36648cfb6f81cc1b73 upstream. The message "key wipe" used to wipe real key stored in crypto layer by rewriting it with zeroes. Since commit 28856a9 ("crypto: xts - consolidate sanity check for keys") this no longer works in FIPS mode for XTS. While running in FIPS mode the crypto key part has to differ from the tweak key. Fixes: 28856a9 ("crypto: xts - consolidate sanity check for keys") Signed-off-by: Ondrej Kozina Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 389a3637ffcc..b8f3d77d3b5c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1649,12 +1649,16 @@ static int crypt_set_key(struct crypt_config *cc, char *key) static int crypt_wipe_key(struct crypt_config *cc) { + int r; + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); - memset(&cc->key, 0, cc->key_size * sizeof(u8)); + get_random_bytes(&cc->key, cc->key_size); kzfree(cc->key_string); cc->key_string = NULL; + r = crypt_setkey(cc); + memset(&cc->key, 0, cc->key_size * sizeof(u8)); - return crypt_setkey(cc); + return r; } static void crypt_dtr(struct dm_target *ti) From ee68aa113f7341f850c71156316913df2db580fd Mon Sep 17 00:00:00 2001 From: Somasundaram Krishnasamy Date: Fri, 7 Apr 2017 12:14:55 -0700 Subject: [PATCH 068/465] dm era: save spacemap metadata root after the pre-commit commit 117aceb030307dcd431fdcff87ce988d3016c34a upstream. When committing era metadata to disk, it doesn't always save the latest spacemap metadata root in superblock. Due to this, metadata is getting corrupted sometimes when reopening the device. The correct order of update should be, pre-commit (shadows spacemap root), save the spacemap root (newly shadowed block) to in-core superblock and then the final commit. Signed-off-by: Somasundaram Krishnasamy Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-era-target.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 9fab33b113c4..68d4084377ad 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -961,15 +961,15 @@ static int metadata_commit(struct era_metadata *md) } } - r = save_sm_root(md); + r = dm_tm_pre_commit(md->tm); if (r) { - DMERR("%s: save_sm_root failed", __func__); + DMERR("%s: pre commit failed", __func__); return r; } - r = dm_tm_pre_commit(md->tm); + r = save_sm_root(md); if (r) { - DMERR("%s: pre commit failed", __func__); + DMERR("%s: save_sm_root failed", __func__); return r; } From 899750e7d9e811102787bec025cf33b23e49c532 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:19 -0700 Subject: [PATCH 069/465] dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue() commit 23a601248958fa4142d49294352fe8d1fdf3e509 upstream. Otherwise the request-based DM blk-mq request_queue will be put into service without being properly exported via sysfs. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-rq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 0b081d170087..505b9f6b4a47 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -810,10 +810,14 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) dm_init_md_queue(md); /* backfill 'mq' sysfs registration normally done in blk_register_queue */ - blk_mq_register_dev(disk_to_dev(md->disk), q); + err = blk_mq_register_dev(disk_to_dev(md->disk), q); + if (err) + goto out_cleanup_queue; return 0; +out_cleanup_queue: + blk_cleanup_queue(q); out_tag_set: blk_mq_free_tag_set(md->tag_set); out_kfree_tag_set: From 34224e0e1c3141c11c8176be7c14a0360ee9bfe1 Mon Sep 17 00:00:00 2001 From: Dennis Yang Date: Tue, 18 Apr 2017 15:27:06 +0800 Subject: [PATCH 070/465] dm thin: fix a memory leak when passing discard bio down commit 948f581a53b704b984aa20df009f0a2b4cf7f907 upstream. dm-thin does not free the discard_parent bio after all chained sub bios finished. The following kmemleak report could be observed after pool with discard_passdown option processes discard bios in linux v4.11-rc7. To fix this, we drop the discard_parent bio reference when its endio (passdown_endio) called. unreferenced object 0xffff8803d6b29700 (size 256): comm "kworker/u8:0", pid 30349, jiffies 4379504020 (age 143002.776s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 01 00 00 00 00 00 00 f0 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0xb4/0x100 [] mempool_alloc_slab+0x10/0x20 [] mempool_alloc+0x55/0x150 [] bio_alloc_bioset+0xb9/0x260 [] process_prepared_discard_passdown_pt1+0x40/0x1c0 [dm_thin_pool] [] break_up_discard_bio+0x1a9/0x200 [dm_thin_pool] [] process_discard_cell_passdown+0x24/0x40 [dm_thin_pool] [] process_discard_bio+0xdd/0xf0 [dm_thin_pool] [] do_worker+0xa76/0xd50 [dm_thin_pool] [] process_one_work+0x139/0x370 [] worker_thread+0x61/0x450 [] kthread+0xd6/0xf0 [] ret_from_fork+0x3f/0x70 [] 0xffffffffffffffff Signed-off-by: Dennis Yang Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2b266a2b5035..5742e5eb0704 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1069,6 +1069,7 @@ static void passdown_endio(struct bio *bio) * to unmap (we ignore err). */ queue_passdown_pt2(bio->bi_private); + bio_put(bio); } static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) From 1e0b0a9ef081bbe75646401db313aa84636b08a0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 13 Apr 2017 14:10:15 -0600 Subject: [PATCH 071/465] vfio/type1: Remove locked page accounting workqueue commit 0cfef2b7410b64d7a430947e0b533314c4f97153 upstream. If the mmap_sem is contented then the vfio type1 IOMMU backend will defer locked page accounting updates to a workqueue task. This has a few problems and depending on which side the user tries to play, they might be over-penalized for unmaps that haven't yet been accounted or race the workqueue to enter more mappings than they're allowed. The original intent of this workqueue mechanism seems to be focused on reducing latency through the ioctl, but we cannot do so at the cost of correctness. Remove this workqueue mechanism and update the callers to allow for failure. We can also now recheck the limit under write lock to make sure we don't exceed it. vfio_pin_pages_remote() also now necessarily includes an unwind path which we can jump to directly if the consecutive page pinning finds that we're exceeding the user's memory limits. This avoids the current lazy approach which does accounting and mapping up to the fault, only to return an error on the next iteration to unwind the entire vfio_dma. Reviewed-by: Peter Xu Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_type1.c | 110 +++++++++++++++----------------- 1 file changed, 51 insertions(+), 59 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 32d2633092a3..a8a079ba9477 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -246,69 +246,46 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) return ret; } -struct vwork { - struct mm_struct *mm; - long npage; - struct work_struct work; -}; - -/* delayed decrement/increment for locked_vm */ -static void vfio_lock_acct_bg(struct work_struct *work) -{ - struct vwork *vwork = container_of(work, struct vwork, work); - struct mm_struct *mm; - - mm = vwork->mm; - down_write(&mm->mmap_sem); - mm->locked_vm += vwork->npage; - up_write(&mm->mmap_sem); - mmput(mm); - kfree(vwork); -} - -static void vfio_lock_acct(struct task_struct *task, long npage) +static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) { - struct vwork *vwork; struct mm_struct *mm; bool is_current; + int ret; if (!npage) - return; + return 0; is_current = (task->mm == current->mm); mm = is_current ? task->mm : get_task_mm(task); if (!mm) - return; /* process exited */ + return -ESRCH; /* process exited */ - if (down_write_trylock(&mm->mmap_sem)) { - mm->locked_vm += npage; - up_write(&mm->mmap_sem); - if (!is_current) - mmput(mm); - return; - } + ret = down_write_killable(&mm->mmap_sem); + if (!ret) { + if (npage > 0) { + if (lock_cap ? !*lock_cap : + !has_capability(task, CAP_IPC_LOCK)) { + unsigned long limit; + + limit = task_rlimit(task, + RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (mm->locked_vm + npage > limit) + ret = -ENOMEM; + } + } + + if (!ret) + mm->locked_vm += npage; - if (is_current) { - mm = get_task_mm(task); - if (!mm) - return; + up_write(&mm->mmap_sem); } - /* - * Couldn't get mmap_sem lock, so must setup to update - * mm->locked_vm later. If locked_vm were atomic, we - * wouldn't need this silliness - */ - vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); - if (WARN_ON(!vwork)) { + if (!is_current) mmput(mm); - return; - } - INIT_WORK(&vwork->work, vfio_lock_acct_bg); - vwork->mm = mm; - vwork->npage = npage; - schedule_work(&vwork->work); + + return ret; } /* @@ -405,7 +382,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base) { - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + unsigned long pfn = 0, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; bool lock_cap = capable(CAP_IPC_LOCK); long ret, pinned = 0, lock_acct = 0; bool rsvd; @@ -442,8 +419,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, /* Lock all the consecutive pages from pfn_base */ for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - unsigned long pfn = 0; - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); if (ret) break; @@ -460,14 +435,25 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, put_pfn(pfn, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); - break; + ret = -ENOMEM; + goto unpin_out; } lock_acct++; } } out: - vfio_lock_acct(current, lock_acct); + ret = vfio_lock_acct(current, lock_acct, &lock_cap); + +unpin_out: + if (ret) { + if (!rsvd) { + for (pfn = *pfn_base ; pinned ; pfn++, pinned--) + put_pfn(pfn, dma->prot); + } + + return ret; + } return pinned; } @@ -488,7 +474,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, } if (do_accounting) - vfio_lock_acct(dma->task, locked - unlocked); + vfio_lock_acct(dma->task, locked - unlocked, NULL); return unlocked; } @@ -522,8 +508,14 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, goto pin_page_exit; } - if (!rsvd && do_accounting) - vfio_lock_acct(dma->task, 1); + if (!rsvd && do_accounting) { + ret = vfio_lock_acct(dma->task, 1, &lock_cap); + if (ret) { + put_pfn(*pfn_base, dma->prot); + goto pin_page_exit; + } + } + ret = 1; pin_page_exit: @@ -543,7 +535,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); if (do_accounting) - vfio_lock_acct(dma->task, -unlocked); + vfio_lock_acct(dma->task, -unlocked, NULL); return unlocked; } @@ -740,7 +732,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->iommu_mapped = false; if (do_accounting) { - vfio_lock_acct(dma->task, -unlocked); + vfio_lock_acct(dma->task, -unlocked, NULL); return 0; } return unlocked; @@ -1382,7 +1374,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) if (!is_invalid_reserved_pfn(vpfn->pfn)) locked++; } - vfio_lock_acct(dma->task, locked - unlocked); + vfio_lock_acct(dma->task, locked - unlocked, NULL); } } From 09944c26606e5a1e6e7b76e58130ffed647c9d3f Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Sat, 29 Apr 2017 10:38:48 +0800 Subject: [PATCH 072/465] iov_iter: don't revert iov buffer if csum error commit a6a5993243550b09f620941dea741b7421fdf79c upstream. The patch 327868212381 (make skb_copy_datagram_msg() et.al. preserve ->msg_iter on error) will revert the iov buffer if copy to iter failed, but it didn't copy any datagram if the skb_checksum_complete error, so no need to revert any data at this place. v2: Sabrina notice that return -EFAULT when checksum error is not correct here, it would confuse the caller about the return value, so fix it. Fixes: 327868212381 ("make skb_copy_datagram_msg() et.al. preserve->msg_iter on error") Signed-off-by: Ding Tianhong Acked-by: Al Viro Signed-off-by: Wei Yongjun Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index f4947e737f34..d797baa69e43 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -760,7 +760,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (msg_data_left(msg) < chunk) { if (__skb_checksum_complete(skb)) - goto csum_error; + return -EINVAL; if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) goto fault; } else { @@ -768,15 +768,16 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, chunk, &csum)) goto fault; - if (csum_fold(csum)) - goto csum_error; + + if (csum_fold(csum)) { + iov_iter_revert(&msg->msg_iter, chunk); + return -EINVAL; + } + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); } return 0; -csum_error: - iov_iter_revert(&msg->msg_iter, chunk); - return -EINVAL; fault: return -EFAULT; } From 5c7f1dfa7b512c7f60f55a6273aae5a09779b76d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 19 Mar 2017 10:55:57 +0200 Subject: [PATCH 073/465] IB/core: Fix sysfs registration error flow commit b312be3d87e4c80872cbea869e569175c5eb0f9a upstream. The kernel commit cited below restructured ib device management so that the device kobject is initialized in ib_alloc_device. As part of the restructuring, the kobject is now initialized in procedure ib_alloc_device, and is later added to the device hierarchy in the ib_register_device call stack, in procedure ib_device_register_sysfs (which calls device_add). However, in the ib_device_register_sysfs error flow, if an error occurs following the call to device_add, the cleanup procedure device_unregister is called. This call results in the device object being deleted -- which results in various use-after-free crashes. The correct cleanup call is device_del -- which undoes device_add without deleting the device object. The device object will then (correctly) be deleted in the ib_register_device caller's error cleanup flow, when the caller invokes ib_dealloc_device. Fixes: 55aeed06544f6 ("IB/core: Make ib_alloc_device init the kobject") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index daadf3130c9f..48bb75503255 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1301,7 +1301,7 @@ int ib_device_register_sysfs(struct ib_device *device, free_port_list_attributes(device); err_unregister: - device_unregister(class_dev); + device_del(class_dev); err: return ret; From 82f0fecceef090493b8fe6faea75dad2a8473d46 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 19 Mar 2017 10:55:55 +0200 Subject: [PATCH 074/465] IB/core: Fix kernel crash during fail to initialize device commit 4be3a4fa51f432ef045546d16f25c68a1ab525b9 upstream. This patch fixes the kernel crash that occurs during ib_dealloc_device() called due to provider driver fails with an error after ib_alloc_device() and before it can register using ib_register_device(). This crashed seen in tha lab as below which can occur with any IB device which fails to perform its device initialization before invoking ib_register_device(). This patch avoids touching cache and port immutable structures if device is not yet initialized. It also releases related memory when cache and port immutable data structure initialization fails during register_device() state. [81416.561946] BUG: unable to handle kernel NULL pointer dereference at (null) [81416.570340] IP: ib_cache_release_one+0x29/0x80 [ib_core] [81416.576222] PGD 78da66067 [81416.576223] PUD 7f2d7c067 [81416.579484] PMD 0 [81416.582720] [81416.587242] Oops: 0000 [#1] SMP [81416.722395] task: ffff8807887515c0 task.stack: ffffc900062c0000 [81416.729148] RIP: 0010:ib_cache_release_one+0x29/0x80 [ib_core] [81416.735793] RSP: 0018:ffffc900062c3a90 EFLAGS: 00010202 [81416.741823] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 [81416.749785] RDX: 0000000000000000 RSI: 0000000000000282 RDI: ffff880859fec000 [81416.757757] RBP: ffffc900062c3aa0 R08: ffff8808536e5ac0 R09: ffff880859fec5b0 [81416.765708] R10: 00000000536e5c01 R11: ffff8808536e5ac0 R12: ffff880859fec000 [81416.773672] R13: 0000000000000000 R14: ffff8808536e5ac0 R15: ffff88084ebc0060 [81416.781621] FS: 00007fd879fab740(0000) GS:ffff88085fac0000(0000) knlGS:0000000000000000 [81416.790522] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [81416.797094] CR2: 0000000000000000 CR3: 00000007eb215000 CR4: 00000000003406e0 [81416.805051] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [81416.812997] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [81416.820950] Call Trace: [81416.824226] ib_device_release+0x1e/0x40 [ib_core] [81416.829858] device_release+0x32/0xa0 [81416.834370] kobject_cleanup+0x63/0x170 [81416.839058] kobject_put+0x25/0x50 [81416.843319] ib_dealloc_device+0x25/0x40 [ib_core] [81416.848986] mlx5_ib_add+0x163/0x1990 [mlx5_ib] [81416.854414] mlx5_add_device+0x5a/0x160 [mlx5_core] [81416.860191] mlx5_register_interface+0x8d/0xc0 [mlx5_core] [81416.866587] ? 0xffffffffa09e9000 [81416.870816] mlx5_ib_init+0x15/0x17 [mlx5_ib] [81416.876094] do_one_initcall+0x51/0x1b0 [81416.880861] ? __vunmap+0x85/0xd0 [81416.885113] ? kmem_cache_alloc_trace+0x14b/0x1b0 [81416.890768] ? vfree+0x2e/0x70 [81416.894762] do_init_module+0x60/0x1fa [81416.899441] load_module+0x15f6/0x1af0 [81416.904114] ? __symbol_put+0x60/0x60 [81416.908709] ? ima_post_read_file+0x3d/0x80 [81416.913828] ? security_kernel_post_read_file+0x6b/0x80 [81416.920006] SYSC_finit_module+0xa6/0xf0 [81416.924888] SyS_finit_module+0xe/0x10 [81416.929568] entry_SYSCALL_64_fastpath+0x1a/0xa9 [81416.935089] RIP: 0033:0x7fd879494949 [81416.939543] RSP: 002b:00007ffdbc1b4e58 EFLAGS: 00000202 ORIG_RAX: 0000000000000139 [81416.947982] RAX: ffffffffffffffda RBX: 0000000001b66f00 RCX: 00007fd879494949 [81416.955965] RDX: 0000000000000000 RSI: 000000000041a13c RDI: 0000000000000003 [81416.963926] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000001b652a0 [81416.971861] R10: 0000000000000003 R11: 0000000000000202 R12: 00007ffdbc1b3e70 [81416.979763] R13: 00007ffdbc1b3e50 R14: 0000000000000005 R15: 0000000000000000 [81417.008005] RIP: ib_cache_release_one+0x29/0x80 [ib_core] RSP: ffffc900062c3a90 [81417.016045] CR2: 0000000000000000 Fixes: 55aeed0654 ("IB/core: Make ib_alloc_device init the kobject") Fixes: 7738613e7c ("IB/core: Add per port immutable struct to ib_device") Reviewed-by: Daniel Jurgens Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/device.c | 33 +++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7c9e34d679d3..81d447da0048 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -172,8 +172,16 @@ static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); - ib_cache_release_one(dev); - kfree(dev->port_immutable); + WARN_ON(dev->reg_state == IB_DEV_REGISTERED); + if (dev->reg_state == IB_DEV_UNREGISTERED) { + /* + * In IB_DEV_UNINITIALIZED state, cache or port table + * is not even created. Free cache and port table only when + * device reaches UNREGISTERED state. + */ + ib_cache_release_one(dev); + kfree(dev->port_immutable); + } kfree(dev); } @@ -380,32 +388,27 @@ int ib_register_device(struct ib_device *device, ret = ib_cache_setup_one(device); if (ret) { pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); - goto out; + goto port_cleanup; } ret = ib_device_register_rdmacg(device); if (ret) { pr_warn("Couldn't register device with rdma cgroup\n"); - ib_cache_cleanup_one(device); - goto out; + goto cache_cleanup; } memset(&device->attrs, 0, sizeof(device->attrs)); ret = device->query_device(device, &device->attrs, &uhw); if (ret) { pr_warn("Couldn't query the device attributes\n"); - ib_device_unregister_rdmacg(device); - ib_cache_cleanup_one(device); - goto out; + goto cache_cleanup; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { pr_warn("Couldn't register device %s with driver model\n", device->name); - ib_device_unregister_rdmacg(device); - ib_cache_cleanup_one(device); - goto out; + goto cache_cleanup; } device->reg_state = IB_DEV_REGISTERED; @@ -417,6 +420,14 @@ int ib_register_device(struct ib_device *device, down_write(&lists_rwsem); list_add_tail(&device->core_list, &device_list); up_write(&lists_rwsem); + mutex_unlock(&device_mutex); + return 0; + +cache_cleanup: + ib_cache_cleanup_one(device); + ib_cache_release_one(device); +port_cleanup: + kfree(device->port_immutable); out: mutex_unlock(&device_mutex); return ret; From 88ded1f18b4ee3cef7d08883cf51d0a226d51c72 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Sun, 9 Apr 2017 10:15:51 -0700 Subject: [PATCH 075/465] IB/core: For multicast functions, verify that LIDs are multicast LIDs commit 8561eae60ff9417a50fa1fb2b83ae950dc5c1e21 upstream. The Infiniband spec defines "A multicast address is defined by a MGID and a MLID" (section 10.5). Currently the MLID value is not validated. Add check to verify that the MLID value is in the correct address range. Fixes: 0c33aeedb2cf ("[IB] Add checks to multicast attach and detach") Reviewed-by: Ira Weiny Reviewed-by: Dasaratharaman Chandramouli Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/verbs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 85ed5051fdfd..207e5c2457cc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1519,7 +1519,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->attach_mcast) return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || + lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || + lid == be16_to_cpu(IB_LID_PERMISSIVE)) return -EINVAL; ret = qp->device->attach_mcast(qp, gid, lid); @@ -1535,7 +1537,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->detach_mcast) return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || + lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || + lid == be16_to_cpu(IB_LID_PERMISSIVE)) return -EINVAL; ret = qp->device->detach_mcast(qp, gid, lid); From f98af463f8bb0fd0d7b2ffaec54ac6854f1b49b6 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Wed, 29 Mar 2017 06:21:59 -0400 Subject: [PATCH 076/465] IB/IPoIB: ibX: failed to create mcg debug file commit 771a52584096c45e4565e8aabb596eece9d73d61 upstream. When udev renames the netdev devices, ipoib debugfs entries does not get renamed. As a result, if subsequent probe of ipoib device reuse the name then creating a debugfs entry for the new device would fail. Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part of ipoib event handling in order to avoid any race condition between these. Fixes: 1732b0ef3b3a ([IPoIB] add path record information in debugfs) Signed-off-by: Vijay Kumar Signed-off-by: Shamir Rabinovitch Reviewed-by: Mark Bloch Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 3 ++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 44 ++++++++++++++++++++--- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 3 -- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 6bd5740e2691..09396bd7b02d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -281,8 +281,11 @@ void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); + WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n"); + WARN_ONCE(!priv->path_dentry, "null path debug file\n"); debugfs_remove(priv->mcg_dentry); debugfs_remove(priv->path_dentry); + priv->mcg_dentry = priv->path_dentry = NULL; } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d1d3fb7a6127..b319cc26c9a7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,6 +108,33 @@ static struct ib_client ipoib_client = { .get_net_dev_by_params = ipoib_get_net_dev_by_params, }; +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static int ipoib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netdev_notifier_info *ni = ptr; + struct net_device *dev = ni->dev; + + if (dev->netdev_ops->ndo_open != ipoib_open) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + ipoib_create_debug_files(dev); + break; + case NETDEV_CHANGENAME: + ipoib_delete_debug_files(dev); + ipoib_create_debug_files(dev); + break; + case NETDEV_UNREGISTER: + ipoib_delete_debug_files(dev); + break; + } + + return NOTIFY_DONE; +} +#endif + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1674,8 +1701,6 @@ void ipoib_dev_cleanup(struct net_device *dev) ASSERT_RTNL(); - ipoib_delete_debug_files(dev); - /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { /* Stop GC on child */ @@ -2090,8 +2115,6 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } - ipoib_create_debug_files(priv->dev); - if (ipoib_cm_add_mode_attr(priv->dev)) goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) @@ -2106,7 +2129,6 @@ static struct net_device *ipoib_add_port(const char *format, return priv->dev; sysfs_failed: - ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: @@ -2191,6 +2213,12 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) kfree(dev_list); } +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static struct notifier_block ipoib_netdev_notifier = { + .notifier_call = ipoib_netdev_event, +}; +#endif + static int __init ipoib_init_module(void) { int ret; @@ -2243,6 +2271,9 @@ static int __init ipoib_init_module(void) if (ret) goto err_client; +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + register_netdevice_notifier(&ipoib_netdev_notifier); +#endif return 0; err_client: @@ -2260,6 +2291,9 @@ static int __init ipoib_init_module(void) static void __exit ipoib_cleanup_module(void) { +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + unregister_netdevice_notifier(&ipoib_netdev_notifier); +#endif ipoib_netlink_fini(); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 3e10e3dac2e7..e543bc745f34 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, goto register_failed; } - ipoib_create_debug_files(priv->dev); - /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { if (ipoib_cm_add_mode_attr(priv->dev)) @@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, sysfs_failed: result = -ENOMEM; - ipoib_delete_debug_files(priv->dev); unregister_netdevice(priv->dev); register_failed: From 295a7aab354d14f0303867cf7fd71af26336f4d8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 21 Mar 2017 12:57:05 +0200 Subject: [PATCH 077/465] IB/mlx4: Fix ib device initialization error flow commit 99e68909d5aba1861897fe7afc3306c3c81b6de0 upstream. In mlx4_ib_add, procedure mlx4_ib_alloc_eqs is called to allocate EQs. However, in the mlx4_ib_add error flow, procedure mlx4_ib_free_eqs is not called to free the allocated EQs. Fixes: e605b743f33d ("IB/mlx4: Increase the number of vectors (EQs) available for ULPs") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index fba94df28cf1..c7e6d137c162 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2941,6 +2941,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); err_map: + mlx4_ib_free_eqs(dev, ibdev); iounmap(ibdev->uar_map); err_uar: From 7a48c89a3c88745f0bfb16c3ad8d7be26fc3482c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 21 Mar 2017 12:57:06 +0200 Subject: [PATCH 078/465] IB/mlx4: Reduce SRIOV multicast cleanup warning message to debug level commit fb7a91746af18b2ebf596778b38a709cdbc488d3 upstream. A warning message during SRIOV multicast cleanup should have actually been a debug level message. The condition generating the warning does no harm and can fill the message log. In some cases, during testing, some tests were so intense as to swamp the message log with these warning messages, causing a stall in the console message log output task. This stall caused an NMI to be sent to all CPUs (so that they all dumped their stacks into the message log). Aside from the message flood causing an NMI, the tests all passed. Once the message flood which caused the NMI is removed (by reducing the warning message to debug level), the NMI no longer occurs. Sample message log (console log) output illustrating the flood and resultant NMI (snippets with comments and modified with ... instead of hex digits, to satisfy checkpatch.pl): _mlx4_ib_mcg_port_cleanup: ... WARNING: group refcount 1!!!... *** About 4000 almost identical lines in less than one second *** _mlx4_ib_mcg_port_cleanup: ... WARNING: group refcount 1!!!... INFO: rcu_sched detected stalls on CPUs/tasks: { 17} (...) *** { 17} above indicates that CPU 17 was the one that stalled *** sending NMI to all CPUs: ... NMI backtrace for cpu 17 CPU: 17 PID: 45909 Comm: kworker/17:2 Hardware name: HP ProLiant DL360p Gen8, BIOS P71 09/08/2013 Workqueue: events fb_flashcursor task: ffff880478...... ti: ffff88064e...... task.ti: ffff88064e...... RIP: 0010:[ffffffff81......] [ffffffff81......] io_serial_in+0x15/0x20 RSP: 0018:ffff88064e257cb0 EFLAGS: 00000002 RAX: 0000000000...... RBX: ffffffff81...... RCX: 0000000000...... RDX: 0000000000...... RSI: 0000000000...... RDI: ffffffff81...... RBP: ffff88064e...... R08: ffffffff81...... R09: 0000000000...... R10: 0000000000...... R11: ffff88064e...... R12: 0000000000...... R13: 0000000000...... R14: ffffffff81...... R15: 0000000000...... FS: 0000000000......(0000) GS:ffff8804af......(0000) knlGS:000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080...... CR2: 00007f2a2f...... CR3: 0000000001...... CR4: 0000000000...... DR0: 0000000000...... DR1: 0000000000...... DR2: 0000000000...... DR3: 0000000000...... DR6: 00000000ff...... DR7: 0000000000...... Stack: ffff88064e...... ffffffff81...... ffffffff81...... 0000000000...... ffffffff81...... ffff88064e...... ffffffff81...... ffffffff81...... ffffffff81...... ffff88064e...... ffffffff81...... 0000000000...... Call Trace: [] wait_for_xmitr+0x3b/0xa0 [] serial8250_console_putchar+0x1c/0x30 [] ? serial8250_console_write+0x140/0x140 [] uart_console_write+0x3a/0x80 [] serial8250_console_write+0xae/0x140 [] call_console_drivers.constprop.15+0x91/0xf0 [] console_unlock+0x3bf/0x400 [] fb_flashcursor+0x5d/0x140 [] ? bit_clear+0x120/0x120 [] process_one_work+0x17b/0x470 [] worker_thread+0x11b/0x400 [] ? rescuer_thread+0x400/0x400 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Code: 48 89 e5 d3 e6 48 63 f6 48 03 77 10 8b 06 5d c3 66 0f 1f 44 00 00 66 66 66 6 As indicated in the stack trace above, the console output task got swamped. Fixes: b9c5d6a64358 ("IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/mcg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index e010fe459e67..8772d88d324d 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1102,7 +1102,8 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy while ((p = rb_first(&ctx->mcg_table)) != NULL) { group = rb_entry(p, struct mcast_group, node); if (atomic_read(&group->refcount)) - mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group); + mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n", + atomic_read(&group->refcount), group); force_clean_group(group); } From 2d5845224145b04699bf157d0acbe817bd0dbadc Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 9 Apr 2017 10:16:35 -0700 Subject: [PATCH 079/465] IB/hfi1: Prevent kernel QP post send hard lockups commit b6eac931b9bb2bce4db7032c35b41e5e34ec22a5 upstream. The driver progress routines can call cond_resched() when a timeslice is exhausted and irqs are enabled. If the ULP had been holding a spin lock without disabling irqs and the post send directly called the progress routine, the cond_resched() could yield allowing another thread from the same ULP to deadlock on that same lock. Correct by replacing the current hfi1_do_send() calldown with a unique one for post send and adding an argument to hfi1_do_send() to indicate that the send engine is running in a thread. If the routine is not running in a thread, avoid calling cond_resched(). Fixes: Commit 831464ce4b74 ("IB/hfi1: Don't call cond_resched in atomic mode when sending packets") Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/ruc.c | 26 ++++++++++++++++---------- drivers/infiniband/hw/hfi1/verbs.c | 4 ++-- drivers/infiniband/hw/hfi1/verbs.h | 6 ++++-- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index aa15bcbfb079..17d7578de6e5 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -784,23 +784,29 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /* when sending, force a reschedule every one of these periods */ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ +void hfi1_do_send_from_rvt(struct rvt_qp *qp) +{ + hfi1_do_send(qp, false); +} + void _hfi1_do_send(struct work_struct *work) { struct iowait *wait = container_of(work, struct iowait, iowork); struct rvt_qp *qp = iowait_to_qp(wait); - hfi1_do_send(qp); + hfi1_do_send(qp, true); } /** * hfi1_do_send - perform a send on a QP * @work: contains a pointer to the QP + * @in_thread: true if in a workqueue thread * * Process entries in the send work queue until credit or queue is * exhausted. Only allow one CPU to send a packet per QP. * Otherwise, two threads could send packets out of order. */ -void hfi1_do_send(struct rvt_qp *qp) +void hfi1_do_send(struct rvt_qp *qp, bool in_thread) { struct hfi1_pkt_state ps; struct hfi1_qp_priv *priv = qp->priv; @@ -868,8 +874,10 @@ void hfi1_do_send(struct rvt_qp *qp) qp->s_hdrwords = 0; /* allow other tasks to run */ if (unlikely(time_after(jiffies, timeout))) { - if (workqueue_congested(cpu, - ps.ppd->hfi1_wq)) { + if (!in_thread || + workqueue_congested( + cpu, + ps.ppd->hfi1_wq)) { spin_lock_irqsave( &qp->s_lock, ps.flags); @@ -882,11 +890,9 @@ void hfi1_do_send(struct rvt_qp *qp) *ps.ppd->dd->send_schedule); return; } - if (!irqs_disabled()) { - cond_resched(); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - } + cond_resched(); + this_cpu_inc( + *ps.ppd->dd->send_schedule); timeout = jiffies + (timeout_int) / 8; } spin_lock_irqsave(&qp->s_lock, ps.flags); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 222315fadab1..16ef7b12b0b8 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1751,7 +1751,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; - dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; + dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3a0b589e41c2..92e72ab2b610 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -350,7 +350,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, void _hfi1_do_send(struct work_struct *work); -void hfi1_do_send(struct rvt_qp *qp); +void hfi1_do_send_from_rvt(struct rvt_qp *qp); + +void hfi1_do_send(struct rvt_qp *qp, bool in_thread); void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); From 08b0b5bd543a9f8fee67283e980cd7db51165bf9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Mar 2017 14:15:52 +0200 Subject: [PATCH 080/465] perf auxtrace: Fix no_size logic in addr_filter__resolve_kernel_syms() commit c3a0bbc7ad7598dec5a204868bdf8a2b1b51df14 upstream. Address filtering with kernel symbols incorrectly resulted in the error "Cannot determine size of symbol" because the no_size logic was the wrong way around. Signed-off-by: Adrian Hunter Tested-by: Andi Kleen Link: http://lkml.kernel.org/r/1490357752-27942-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/auxtrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c5a6e0b12452..78bd632f144d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1826,7 +1826,7 @@ static int addr_filter__resolve_kernel_syms(struct addr_filter *filt) filt->addr = start; if (filt->range && !filt->size && !filt->sym_to) { filt->size = size; - no_size = !!size; + no_size = !size; } } @@ -1840,7 +1840,7 @@ static int addr_filter__resolve_kernel_syms(struct addr_filter *filt) if (err) return err; filt->size = start + size - filt->addr; - no_size = !!size; + no_size = !size; } /* The very last symbol in kallsyms does not imply a particular size */ From b09eace76ed399b92a4ede8d2854bb2347a9cbab Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Apr 2017 09:51:51 +0200 Subject: [PATCH 081/465] perf annotate s390: Fix perf annotate error -95 (4.10 regression) commit e77852b32d6d4430c68c38aaf73efe5650fa25af upstream. since 4.10 perf annotate exits on s390 with an "unknown error -95". Turns out that commit 786c1b51844d ("perf annotate: Start supporting cross arch annotation") added a hard requirement for architecture support when objdump is used but only provided x86 and arm support. Meanwhile power was added so lets add s390 as well. While at it make sure to implement the branch and jump types. Signed-off-by: Christian Borntraeger Cc: Andreas Krebbel Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: linux-s390 Fixes: 786c1b51844 "perf annotate: Start supporting cross arch annotation" Link: http://lkml.kernel.org/r/1491465112-45819-2-git-send-email-borntraeger@de.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/annotate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7aa57225cbf7..fd410ac79bc9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -136,6 +136,12 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "s390", + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) From 7a17cbb4b8dec4a608869138a7f668314d70437b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Apr 2017 09:51:52 +0200 Subject: [PATCH 082/465] perf annotate s390: Implement jump types for perf annotate commit d9f8dfa9baf9b6ae1f2f84f887176558ecde5268 upstream. Implement simple detection for all kind of jumps and branches. Signed-off-by: Christian Borntraeger Cc: Andreas Krebbel Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: linux-s390 Link: http://lkml.kernel.org/r/1491465112-45819-3-git-send-email-borntraeger@de.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/s390/annotate/instructions.c | 30 ++++++++++++++++++++ tools/perf/util/annotate.c | 2 ++ 2 files changed, 32 insertions(+) create mode 100644 tools/perf/arch/s390/annotate/instructions.c diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c new file mode 100644 index 000000000000..745b4b1b8b21 --- /dev/null +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -0,0 +1,30 @@ +static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + /* catch all kind of jumps */ + if (strchr(name, 'j') || + !strncmp(name, "bct", 3) || + !strncmp(name, "br", 2)) + ops = &jump_ops; + /* override call/returns */ + if (!strcmp(name, "bras") || + !strcmp(name, "brasl") || + !strcmp(name, "basr")) + ops = &call_ops; + if (!strcmp(name, "br")) + ops = &ret_ops; + + arch__associate_ins_ops(arch, name, ops); + return ops; +} + +static int s390__annotate_init(struct arch *arch) +{ + if (!arch->initialized) { + arch->initialized = true; + arch->associate_instruction_ops = s390__associate_ins_ops; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index fd410ac79bc9..83c57a11dc5b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -108,6 +108,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arm64/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" +#include "arch/s390/annotate/instructions.c" static struct arch architectures[] = { { @@ -132,6 +133,7 @@ static struct arch architectures[] = { }, { .name = "s390", + .init = s390__annotate_init, .objdump = { .comment_char = '#', }, From 7621cb7966b423cb09343b7d8a7bab87efbdd51a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 29 Apr 2017 21:07:30 -0400 Subject: [PATCH 083/465] jbd2: fix dbench4 performance regression for 'nobarrier' mounts commit 5052b069acf73866d00077d8bc49983c3ee903e5 upstream. Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_FUA implementation. Since JBD2 strips REQ_FUA and REQ_FLUSH flags from submitted IO when the filesystem is mounted with nobarrier mount option, journal superblock writes ended up being async writes after this patch and that caused heavy performance regression for dbench4 benchmark with high number of processes. In my test setup with HP RAID array with non-volatile write cache and 32 GB ram, dbench4 runs with 8 processes regressed by ~25%. Fix the problem by making sure journal superblock writes are always treated as synchronous since they generally block progress of the journalling machinery and thus the whole filesystem. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5adc2fb62b0f..e768126f6a72 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1348,7 +1348,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) jbd2_superblock_csum_set(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(REQ_OP_WRITE, write_flags, bh); + ret = submit_bh(REQ_OP_WRITE, write_flags | REQ_SYNC, bh); wait_on_buffer(bh); if (buffer_write_io_error(bh)) { clear_buffer_write_io_error(bh); From 1567c170633295b10e5821a34db5b341aefe1156 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 30 Apr 2017 00:10:50 -0400 Subject: [PATCH 084/465] ext4: evict inline data when writing to memory map commit 7b4cc9787fe35b3ee2dfb1c35e22eafc32e00c33 upstream. Currently the case of writing via mmap to a file with inline data is not handled. This is maybe a rare case since it requires a writable memory map of a very small file, but it is trivial to trigger with on inline_data filesystem, and it causes the 'BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA));' in ext4_writepages() to be hit: mkfs.ext4 -O inline_data /dev/vdb mount /dev/vdb /mnt xfs_io -f /mnt/file \ -c 'pwrite 0 1' \ -c 'mmap -w 0 1m' \ -c 'mwrite 0 1' \ -c 'fsync' kernel BUG at fs/ext4/inode.c:2723! invalid opcode: 0000 [#1] SMP CPU: 1 PID: 2532 Comm: xfs_io Not tainted 4.11.0-rc1-xfstests-00301-g071d9acf3d1f #633 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014 task: ffff88003d3a8040 task.stack: ffffc90000300000 RIP: 0010:ext4_writepages+0xc89/0xf8a RSP: 0018:ffffc90000303ca0 EFLAGS: 00010283 RAX: 0000028410000000 RBX: ffff8800383fa3b0 RCX: ffffffff812afcdc RDX: 00000a9d00000246 RSI: ffffffff81e660e0 RDI: 0000000000000246 RBP: ffffc90000303dc0 R08: 0000000000000002 R09: 869618e8f99b4fa5 R10: 00000000852287a2 R11: 00000000a03b49f4 R12: ffff88003808e698 R13: 0000000000000000 R14: 7fffffffffffffff R15: 7fffffffffffffff FS: 00007fd3e53094c0(0000) GS:ffff88003e400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd3e4c51000 CR3: 000000003d554000 CR4: 00000000003406e0 Call Trace: ? _raw_spin_unlock+0x27/0x2a ? kvm_clock_read+0x1e/0x20 do_writepages+0x23/0x2c ? do_writepages+0x23/0x2c __filemap_fdatawrite_range+0x80/0x87 filemap_write_and_wait_range+0x67/0x8c ext4_sync_file+0x20e/0x472 vfs_fsync_range+0x8e/0x9f ? syscall_trace_enter+0x25b/0x2d0 vfs_fsync+0x1c/0x1e do_fsync+0x31/0x4a SyS_fsync+0x10/0x14 do_syscall_64+0x69/0x131 entry_SYSCALL64_slow_path+0x25/0x25 We could try to be smart and keep the inline data in this case, or at least support delayed allocation when allocating the block, but these solutions would be more complicated and don't seem worthwhile given how rare this case seems to be. So just fix the bug by calling ext4_convert_inline_data() when we're asked to make a page writable, so that any inline data gets evicted, with the block allocated immediately. Reported-by: Nick Alcock Reviewed-by: Andreas Dilger Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b9ffa9f4191f..88203ae5b154 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5874,6 +5874,11 @@ int ext4_page_mkwrite(struct vm_fault *vmf) file_update_time(vma->vm_file); down_read(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_convert_inline_data(inode); + if (ret) + goto out_ret; + /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && From 7adb15036a05c76687706d40f0dda030f8607b0b Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 25 Apr 2017 15:37:56 -0400 Subject: [PATCH 085/465] orangefs: fix bounds check for listxattr commit a956af337b9ff25822d9ce1a59c6ed0c09fc14b9 upstream. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 74a81b1daaac..fba4db7d0512 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -358,7 +358,7 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size) returned_count = new_op->downcall.resp.listxattr.returned_count; if (returned_count < 0 || - returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) { + returned_count > ORANGEFS_MAX_XATTR_LISTLEN) { gossip_err("%s: impossible value for returned_count:%d:\n", __func__, returned_count); From a6d3c33255f899afb233fca2439702099055baf9 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 25 Apr 2017 15:37:57 -0400 Subject: [PATCH 086/465] orangefs: clean up oversize xattr validation commit e675c5ec51fe2554719a7b6bcdbef0a770f2c19b upstream. Also don't check flags as this has been validated by the VFS already. Fix an off-by-one error in the max size checking. Stop logging just because userspace wants to write attributes which do not fit. This and the previous commit fix xfstests generic/020. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/xattr.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index fba4db7d0512..237c9c04dc3b 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -76,11 +76,8 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) { - gossip_err("Invalid key length (%d)\n", - (int)strlen(name)); + if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; - } fsuid = from_kuid(&init_user_ns, current_fsuid()); fsgid = from_kgid(&init_user_ns, current_fsgid()); @@ -172,6 +169,9 @@ static int orangefs_inode_removexattr(struct inode *inode, const char *name, struct orangefs_kernel_op_s *new_op = NULL; int ret = -ENOMEM; + if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) + return -EINVAL; + down_write(&orangefs_inode->xattr_sem); new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR); if (!new_op) @@ -231,23 +231,13 @@ int orangefs_inode_setxattr(struct inode *inode, const char *name, "%s: name %s, buffer_size %zd\n", __func__, name, size); - if (size >= ORANGEFS_MAX_XATTR_VALUELEN || - flags < 0) { - gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n", - (int)size, - flags); + if (size > ORANGEFS_MAX_XATTR_VALUELEN) + return -EINVAL; + if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; - } internal_flag = convert_to_internal_xattr_flags(flags); - if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) { - gossip_err - ("orangefs_inode_setxattr: bogus key size (%d)\n", - (int)(strlen(name))); - return -EINVAL; - } - /* This is equivalent to a removexattr */ if (size == 0 && value == NULL) { gossip_debug(GOSSIP_XATTR_DEBUG, From 66f1885beae71ccb4b49427cd37c57862b818a26 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 25 Apr 2017 15:37:58 -0400 Subject: [PATCH 087/465] orangefs: do not set getattr_time on orangefs_lookup commit 17930b252cd6f31163c259eaa99dd8aa630fb9ba upstream. Since orangefs_lookup calls orangefs_iget which calls orangefs_inode_getattr, getattr_time will get set. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/namei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index a290ff6ec756..7c315938e9c2 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -193,8 +193,6 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - ORANGEFS_I(inode)->getattr_time = jiffies - 1; - gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d " "Found good inode [%lu] with count [%d]\n", From 8bd83223086df14ed99f52950e3c064985c94fd6 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 25 Apr 2017 15:38:04 -0400 Subject: [PATCH 088/465] orangefs: do not check possibly stale size on truncate commit 53950ef541675df48c219a8d665111a0e68dfc2f upstream. Let the server figure this out because our size might be out of date or not present. The bug was that xfs_io -f -t -c "pread -v 0 100" /mnt/foo echo "Test" > /mnt/foo xfs_io -f -t -c "pread -v 0 100" /mnt/foo fails because the second truncate did not happen if nothing had requested the size after the write in echo. Thus i_size was zero (not present) and the orangefs_setattr though i_size was zero and there was nothing to do. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index a304bf34b212..5ebe19353da6 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -218,8 +218,7 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) if (ret) goto out; - if ((iattr->ia_valid & ATTR_SIZE) && - iattr->ia_size != i_size_read(inode)) { + if (iattr->ia_valid & ATTR_SIZE) { ret = orangefs_setattr_size(inode, iattr); if (ret) goto out; From 2cfc74406969c5a7ced9bcd51af5792970713b22 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:24 -0700 Subject: [PATCH 089/465] fs/xattr.c: zero out memory copied to userspace in getxattr commit 81be3dee96346fbe08c31be5ef74f03f6b63cf68 upstream. getxattr uses vmalloc to allocate memory if kzalloc fails. This is filled by vfs_getxattr and then copied to the userspace. vmalloc, however, doesn't zero out the memory so if the specific implementation of the xattr handler is sloppy we can theoretically expose a kernel memory. There is no real sign this is really the case but let's make sure this will not happen and use vzalloc instead. Fixes: 779302e67835 ("fs/xattr.c:getxattr(): improve handling of allocation failures") Link: http://lkml.kernel.org/r/20170306103327.2766-1-mhocko@kernel.org Acked-by: Kees Cook Reported-by: Vlastimil Babka Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xattr.c b/fs/xattr.c index 7e3317cf4045..94f49a082dd2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -530,7 +530,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, size = XATTR_SIZE_MAX; kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); if (!kvalue) { - kvalue = vmalloc(size); + kvalue = vzalloc(size); if (!kvalue) return -ENOMEM; } From a4c34c20fbbc505dc3b202d299400ed57fed370b Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 28 Apr 2017 11:14:04 +0100 Subject: [PATCH 090/465] ceph: fix memory leak in __ceph_setxattr() commit eeca958dce0a9231d1969f86196653eb50fcc9b3 upstream. The ceph_inode_xattr needs to be released when removing an xattr. Easily reproducible running the 'generic/020' test from xfstests or simply by doing: attr -s attr0 -V 0 /mnt/test && attr -r attr0 /mnt/test While there, also fix the error path. Here's the kmemleak splat: unreferenced object 0xffff88001f86fbc0 (size 64): comm "attr", pid 244, jiffies 4294904246 (age 98.464s) hex dump (first 32 bytes): 40 fa 86 1f 00 88 ff ff 80 32 38 1f 00 88 ff ff @........28..... 00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de ................ backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0x9b/0xf0 [] __ceph_setxattr+0x17e/0x820 [] ceph_set_xattr_handler+0x37/0x40 [] __vfs_removexattr+0x4b/0x60 [] vfs_removexattr+0x77/0xd0 [] removexattr+0x41/0x60 [] path_removexattr+0x75/0xa0 [] SyS_lremovexattr+0xb/0x10 [] entry_SYSCALL_64_fastpath+0x13/0x94 [] 0xffffffffffffffff Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index febc28f9e2c2..75267cdd5dfd 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -392,6 +392,7 @@ static int __set_xattr(struct ceph_inode_info *ci, if (update_xattr) { int err = 0; + if (xattr && (flags & XATTR_CREATE)) err = -EEXIST; else if (!xattr && (flags & XATTR_REPLACE)) @@ -399,12 +400,14 @@ static int __set_xattr(struct ceph_inode_info *ci, if (err) { kfree(name); kfree(val); + kfree(*newxattr); return err; } if (update_xattr < 0) { if (xattr) __remove_xattr(ci, xattr); kfree(name); + kfree(*newxattr); return 0; } } From 601462864dcceedd9afc2bbc14439675d48d179c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 3 May 2017 14:55:59 -0700 Subject: [PATCH 091/465] fs: fix data invalidation in the cleancache during direct IO commit 55635ba76ef91f26b418702ace5e6287eb727f6a upstream. Patch series "Properly invalidate data in the cleancache", v2. We've noticed that after direct IO write, buffered read sometimes gets stale data which is coming from the cleancache. The reason for this is that some direct write hooks call call invalidate_inode_pages2[_range]() conditionally iff mapping->nrpages is not zero, so we may not invalidate data in the cleancache. Another odd thing is that we check only for ->nrpages and don't check for ->nrexceptional, but invalidate_inode_pages2[_range] also invalidates exceptional entries as well. So we invalidate exceptional entries only if ->nrpages != 0? This doesn't feel right. - Patch 1 fixes direct IO writes by removing ->nrpages check. - Patch 2 fixes similar case in invalidate_bdev(). Note: I only fixed conditional cleancache_invalidate_inode() here. Do we also need to add ->nrexceptional check in into invalidate_bdev()? - Patches 3-4: some optimizations. This patch (of 4): Some direct IO write fs hooks call invalidate_inode_pages2[_range]() conditionally iff mapping->nrpages is not zero. This can't be right, because invalidate_inode_pages2[_range]() also invalidate data in the cleancache via cleancache_invalidate_inode() call. So if page cache is empty but there is some data in the cleancache, buffered read after direct IO write would get stale data from the cleancache. Also it doesn't feel right to check only for ->nrpages because invalidate_inode_pages2[_range] invalidates exceptional entries as well. Fix this by calling invalidate_inode_pages2[_range]() regardless of nrpages state. Note: nfs,cifs,9p doesn't need similar fix because the never call cleancache_get_page() (nor directly, nor via mpage_readpage[s]()), so they are not affected by this bug. Fixes: c515e1fd361c ("mm/fs: add hooks to support cleancache") Link: http://lkml.kernel.org/r/20170424164135.22350-2-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reviewed-by: Jan Kara Acked-by: Konrad Rzeszutek Wilk Cc: Alexander Viro Cc: Ross Zwisler Cc: Jens Axboe Cc: Johannes Weiner Cc: Alexey Kuznetsov Cc: Christoph Hellwig Cc: Nikolay Borisov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/iomap.c | 18 ++++++++---------- mm/filemap.c | 26 +++++++++++--------------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 141c3cd55a8b..1c25ae30500e 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -887,16 +887,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, flags |= IOMAP_WRITE; } - if (mapping->nrpages) { - ret = filemap_write_and_wait_range(mapping, start, end); - if (ret) - goto out_free_dio; + ret = filemap_write_and_wait_range(mapping, start, end); + if (ret) + goto out_free_dio; - ret = invalidate_inode_pages2_range(mapping, - start >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - ret = 0; - } + ret = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; inode_dio_begin(inode); @@ -951,7 +949,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... */ - if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { + if (iov_iter_rw(iter) == WRITE) { int err = invalidate_inode_pages2_range(mapping, start >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(err); diff --git a/mm/filemap.c b/mm/filemap.c index 1694623a6289..157c047b180a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2719,18 +2719,16 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) * about to write. We do this *before* the write so that we can return * without clobbering -EIOCBQUEUED from ->direct_IO(). */ - if (mapping->nrpages) { - written = invalidate_inode_pages2_range(mapping, + written = invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end); - /* - * If a page can not be invalidated, return 0 to fall back - * to buffered write. - */ - if (written) { - if (written == -EBUSY) - return 0; - goto out; - } + /* + * If a page can not be invalidated, return 0 to fall back + * to buffered write. + */ + if (written) { + if (written == -EBUSY) + return 0; + goto out; } data = *from; @@ -2744,10 +2742,8 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) * so we don't support it 100%. If this invalidation * fails, tough, the write still worked... */ - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); if (written > 0) { pos += written; From f7bccf3f166eee5afce01aead5c0b37fbc2defe0 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 3 May 2017 14:56:02 -0700 Subject: [PATCH 092/465] fs/block_dev: always invalidate cleancache in invalidate_bdev() commit a5f6a6a9c72eac38a7fadd1a038532bc8516337c upstream. invalidate_bdev() calls cleancache_invalidate_inode() iff ->nrpages != 0 which doen't make any sense. Make sure that invalidate_bdev() always calls cleancache_invalidate_inode() regardless of mapping->nrpages value. Fixes: c515e1fd361c ("mm/fs: add hooks to support cleancache") Link: http://lkml.kernel.org/r/20170424164135.22350-3-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reviewed-by: Jan Kara Acked-by: Konrad Rzeszutek Wilk Cc: Alexander Viro Cc: Ross Zwisler Cc: Jens Axboe Cc: Johannes Weiner Cc: Alexey Kuznetsov Cc: Christoph Hellwig Cc: Nikolay Borisov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 56039dfbc674..c2a7ec8e9c03 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -103,12 +103,11 @@ void invalidate_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; - if (mapping->nrpages == 0) - return; - - invalidate_bh_lrus(); - lru_add_drain_all(); /* make sure all lru add caches are flushed */ - invalidate_mapping_pages(mapping, 0, -1); + if (mapping->nrpages) { + invalidate_bh_lrus(); + lru_add_drain_all(); /* make sure all lru add caches are flushed */ + invalidate_mapping_pages(mapping, 0, -1); + } /* 99% of the time, we don't need to flush the cleancache on the bdev. * But, for the strange corners, lets be cautious */ From 41a68ab851822e3588952ae9f2709a61a88338f7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:55:03 -0700 Subject: [PATCH 093/465] mm: vmscan: fix IO/refault regression in cache workingset transition commit 2a2e48854d704214dac7546e87ae0e4daa0e61a0 upstream. Since commit 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") we noticed bigger IO spikes during changes in cache access patterns. The patch in question shrunk the inactive list size to leave more room for the current workingset in the presence of streaming IO. However, workingset transitions that previously happened on the inactive list are now pushed out of memory and incur more refaults to complete. This patch disables active list protection when refaults are being observed. This accelerates workingset transitions, and allows more of the new set to establish itself from memory, without eating into the ability to protect the established workingset during stable periods. The workloads that were measurably affected for us were hit pretty bad by it, with refault/majfault rates doubling and tripling during cache transitions, and the machines sustaining half-hour periods of 100% IO utilization, where they'd previously have sub-minute peaks at 60-90%. Stateful services that handle user data tend to be more conservative with kernel upgrades. As a result we hit most page cache issues with some delay, as was the case here. The severity seemed to warrant a stable tag. Fixes: 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") Link: http://lkml.kernel.org/r/20170404220052.27593-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/memcontrol.h | 64 ++++++++++++++++++++++++-- include/linux/mmzone.h | 2 + mm/memcontrol.c | 24 +++------- mm/vmscan.c | 94 ++++++++++++++++++++++++++++++-------- mm/workingset.c | 7 ++- 5 files changed, 150 insertions(+), 41 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bb7250c45cb8..e650f6f7d0bf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -56,6 +56,9 @@ enum mem_cgroup_stat_index { MEMCG_SLAB_RECLAIMABLE, MEMCG_SLAB_UNRECLAIMABLE, MEMCG_SOCK, + MEMCG_WORKINGSET_REFAULT, + MEMCG_WORKINGSET_ACTIVATE, + MEMCG_WORKINGSET_NODERECLAIM, MEMCG_NR_STAT, }; @@ -494,6 +497,40 @@ extern int do_swap_account; void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); +static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + long val = 0; + int cpu; + + for_each_possible_cpu(cpu) + val += per_cpu(memcg->stat->count[idx], cpu); + + if (val < 0) + val = 0; + + return val; +} + +static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val) +{ + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->count[idx], val); +} + +static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + mem_cgroup_update_stat(memcg, idx, 1); +} + +static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + mem_cgroup_update_stat(memcg, idx, -1); +} + /** * mem_cgroup_update_page_stat - update page state statistics * @page: the page @@ -508,14 +545,14 @@ void unlock_page_memcg(struct page *page); * if (TestClearPageState(page)) * mem_cgroup_update_page_stat(page, state, -1); * unlock_page(page) or unlock_page_memcg(page) + * + * Kernel pages are an exception to this, since they'll never move. */ static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int val) { - VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); - if (page->mem_cgroup) - this_cpu_add(page->mem_cgroup->stat->count[idx], val); + mem_cgroup_update_stat(page->mem_cgroup, idx, val); } static inline void mem_cgroup_inc_page_stat(struct page *page, @@ -740,6 +777,27 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } +static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + return 0; +} + +static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val) +{ +} + +static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ +} + +static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ +} + static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int nr) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8e02b3750fe0..d45172b559d8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -226,6 +226,8 @@ struct lruvec { struct zone_reclaim_stat reclaim_stat; /* Evictions & activations on the inactive file list */ atomic_long_t inactive_age; + /* Refaults at the time of last reclaim cycle */ + unsigned long refaults; #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2bd7541d7c11..a4b8fc7aaf80 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -568,23 +568,6 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) * common workload, threshold and synchronization as vmstat[] should be * implemented. */ -static unsigned long -mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) -{ - long val = 0; - int cpu; - - /* Per-cpu values can be negative, use a signed accumulator */ - for_each_possible_cpu(cpu) - val += per_cpu(memcg->stat->count[idx], cpu); - /* - * Summing races with updates, so val may be negative. Avoid exposing - * transient negative values. - */ - if (val < 0) - val = 0; - return val; -} static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx) @@ -5237,6 +5220,13 @@ static int memory_stat_show(struct seq_file *m, void *v) seq_printf(m, "pgmajfault %lu\n", events[MEM_CGROUP_EVENTS_PGMAJFAULT]); + seq_printf(m, "workingset_refault %lu\n", + stat[MEMCG_WORKINGSET_REFAULT]); + seq_printf(m, "workingset_activate %lu\n", + stat[MEMCG_WORKINGSET_ACTIVATE]); + seq_printf(m, "workingset_nodereclaim %lu\n", + stat[MEMCG_WORKINGSET_NODERECLAIM]); + return 0; } diff --git a/mm/vmscan.c b/mm/vmscan.c index bc8031ef994d..1345d5fba4a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2033,6 +2033,8 @@ static void shrink_active_list(unsigned long nr_to_scan, * Both inactive lists should also be large enough that each inactive * page has a chance to be referenced again before it is reclaimed. * + * If that fails and refaulting is observed, the inactive list grows. + * * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages * on this LRU, maintained by the pageout code. A zone->inactive_ratio * of 3 means 3:1 or 25% of the pages are kept on the inactive list. @@ -2049,12 +2051,15 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct scan_control *sc, bool trace) + struct mem_cgroup *memcg, + struct scan_control *sc, bool actual_reclaim) { - unsigned long inactive_ratio; - unsigned long inactive, active; - enum lru_list inactive_lru = file * LRU_FILE; enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + enum lru_list inactive_lru = file * LRU_FILE; + unsigned long inactive, active; + unsigned long inactive_ratio; + unsigned long refaults; unsigned long gb; /* @@ -2067,27 +2072,43 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); - gb = (inactive + active) >> (30 - PAGE_SHIFT); - if (gb) - inactive_ratio = int_sqrt(10 * gb); + if (memcg) + refaults = mem_cgroup_read_stat(memcg, + MEMCG_WORKINGSET_ACTIVATE); else - inactive_ratio = 1; + refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); + + /* + * When refaults are being observed, it means a new workingset + * is being established. Disable active list protection to get + * rid of the stale workingset quickly. + */ + if (file && actual_reclaim && lruvec->refaults != refaults) { + inactive_ratio = 0; + } else { + gb = (inactive + active) >> (30 - PAGE_SHIFT); + if (gb) + inactive_ratio = int_sqrt(10 * gb); + else + inactive_ratio = 1; + } - if (trace) - trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id, - sc->reclaim_idx, - lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, - lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, - inactive_ratio, file); + if (actual_reclaim) + trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx, + lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, + lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, + inactive_ratio, file); return inactive * inactive_ratio < active; } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct scan_control *sc) + struct lruvec *lruvec, struct mem_cgroup *memcg, + struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), + memcg, sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2218,7 +2239,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, sc, false) && + if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2376,7 +2397,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, sc); + lruvec, memcg, sc); } } @@ -2443,7 +2464,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, sc, true)) + if (inactive_list_is_low(lruvec, false, memcg, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2752,6 +2773,26 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) sc->gfp_mask = orig_mask; } +static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) +{ + struct mem_cgroup *memcg; + + memcg = mem_cgroup_iter(root_memcg, NULL, NULL); + do { + unsigned long refaults; + struct lruvec *lruvec; + + if (memcg) + refaults = mem_cgroup_read_stat(memcg, + MEMCG_WORKINGSET_ACTIVATE); + else + refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); + + lruvec = mem_cgroup_lruvec(pgdat, memcg); + lruvec->refaults = refaults; + } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); +} + /* * This is the main entry point to direct page reclaim. * @@ -2772,6 +2813,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int initial_priority = sc->priority; + pg_data_t *last_pgdat; + struct zoneref *z; + struct zone *zone; retry: delayacct_freepages_start(); @@ -2798,6 +2842,15 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->may_writepage = 1; } while (--sc->priority >= 0); + last_pgdat = NULL; + for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, + sc->nodemask) { + if (zone->zone_pgdat == last_pgdat) + continue; + last_pgdat = zone->zone_pgdat; + snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); + } + delayacct_freepages_end(); if (sc->nr_reclaimed) @@ -3076,7 +3129,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, sc, true)) + if (inactive_list_is_low(lruvec, false, memcg, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); @@ -3311,6 +3364,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) } while (sc.priority >= 1); out: + snapshot_refaults(NULL, pgdat); /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller diff --git a/mm/workingset.c b/mm/workingset.c index eda05c71fa49..51c6f61d4cea 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -269,7 +269,6 @@ bool workingset_refault(void *shadow) lruvec = mem_cgroup_lruvec(pgdat, memcg); refault = atomic_long_read(&lruvec->inactive_age); active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); - rcu_read_unlock(); /* * The unsigned subtraction here gives an accurate distance @@ -290,11 +289,15 @@ bool workingset_refault(void *shadow) refault_distance = (refault - eviction) & EVICTION_MASK; inc_node_state(pgdat, WORKINGSET_REFAULT); + mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_REFAULT); if (refault_distance <= active_file) { inc_node_state(pgdat, WORKINGSET_ACTIVATE); + mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_ACTIVATE); + rcu_read_unlock(); return true; } + rcu_read_unlock(); return false; } @@ -472,6 +475,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->exceptional)) goto out_invalid; inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); + mem_cgroup_inc_page_stat(virt_to_page(node), + MEMCG_WORKINGSET_NODERECLAIM); __radix_tree_delete_node(&mapping->page_tree, node, workingset_update_node, mapping); From d322fd67ca66cb7da82b2c348a1485a409b07ce2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 8 May 2017 15:59:46 -0700 Subject: [PATCH 094/465] mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC commit 62be1511b1db8066220b18b7d4da2e6b9fdc69fb upstream. Patch series "more robust PF_MEMALLOC handling" This series aims to unify the setting and clearing of PF_MEMALLOC, which prevents recursive reclaim. There are some places that clear the flag unconditionally from current->flags, which may result in clearing a pre-existing flag. This already resulted in a bug report that Patch 1 fixes (without the new helpers, to make backporting easier). Patch 2 introduces the new helpers, modelled after existing memalloc_noio_* and memalloc_nofs_* helpers, and converts mm core to use them. Patches 3 and 4 convert non-mm code. This patch (of 4): __alloc_pages_direct_compact() sets PF_MEMALLOC to prevent deadlock during page migration by lock_page() (see the comment in __unmap_and_move()). Then it unconditionally clears the flag, which can clear a pre-existing PF_MEMALLOC flag and result in recursive reclaim. This was not a problem until commit a8161d1ed609 ("mm, page_alloc: restructure direct compaction handling in slowpath"), because direct compation was called only after direct reclaim, which was skipped when PF_MEMALLOC flag was set. Even now it's only a theoretical issue, as the new callsite of __alloc_pages_direct_compact() is reached only for costly orders and when gfp_pfmemalloc_allowed() is true, which means either __GFP_NOMEMALLOC is in gfp_flags or in_interrupt() is true. There is no such known context, but let's play it safe and make __alloc_pages_direct_compact() robust for cases where PF_MEMALLOC is already set. Fixes: a8161d1ed609 ("mm, page_alloc: restructure direct compaction handling in slowpath") Link: http://lkml.kernel.org/r/20170405074700.29871-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reported-by: Andrey Ryabinin Acked-by: Michal Hocko Acked-by: Hillf Danton Cc: Mel Gorman Cc: Johannes Weiner Cc: Boris Brezillon Cc: Chris Leech Cc: "David S. Miller" Cc: Eric Dumazet Cc: Josef Bacik Cc: Lee Duncan Cc: Michal Hocko Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07efbc3a8656..c5fee5a0316d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3245,6 +3245,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, enum compact_priority prio, enum compact_result *compact_result) { struct page *page; + unsigned int noreclaim_flag = current->flags & PF_MEMALLOC; if (!order) return NULL; @@ -3252,7 +3253,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, current->flags |= PF_MEMALLOC; *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, prio); - current->flags &= ~PF_MEMALLOC; + current->flags = (current->flags & ~PF_MEMALLOC) | noreclaim_flag; if (*compact_result <= COMPACT_INACTIVE) return NULL; From ecf9bb0fd7ab18f166077c673b42b72c9ad5f91b Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Wed, 26 Apr 2017 14:05:46 +0100 Subject: [PATCH 095/465] Fix match_prepath() commit cd8c42968ee651b69e00f8661caff32b0086e82d upstream. Incorrect return value for shares not using the prefix path means that we will never match superblocks for these shares. Fixes: commit c1d8b24d1819 ("Compare prepaths when comparing superblocks") Signed-off-by: Sachin Prabhu Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d82467cfb0e2..d95744d8b8ab 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2912,16 +2912,14 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; + bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; + bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; - if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) { - if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)) - return 0; - /* The prepath should be null terminated strings */ - if (strcmp(new->prepath, old->prepath)) - return 0; - + if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; - } + else if (!old_set && !new_set) + return 1; + return 0; } From fdf150ee558cca25e9ec0fd1c1d010366ef59ffc Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Wed, 26 Apr 2017 17:10:17 +0100 Subject: [PATCH 096/465] Do not return number of bytes written for ioctl CIFS_IOC_COPYCHUNK_FILE commit 7d0c234fd2e1c9ca3fa032696c0c58b1b74a9e0b upstream. commit 620d8745b35d ("Introduce cifs_copy_file_range()") changes the behaviour of the cifs ioctl call CIFS_IOC_COPYCHUNK_FILE. In case of successful writes, it now returns the number of bytes written. This return value is treated as an error by the xfstest cifs/001. Depending on the errno set at that time, this may or may not result in the test failing. The patch fixes this by setting the return value to 0 in case of successful writes. Fixes: commit 620d8745b35d ("Introduce cifs_copy_file_range()") Reported-by: Eryu Guan Signed-off-by: Sachin Prabhu Acked-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 265c45fe4ea5..7f4bba574930 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -74,7 +74,8 @@ static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0, src_inode->i_size, 0); - + if (rc > 0) + rc = 0; out_fput: fdput(src_file); out_drop_write: From 49f70d15b5b9780269a9397ac28c63fe20563ae0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 2 May 2017 13:35:20 -0500 Subject: [PATCH 097/465] Set unicode flag on cifs echo request to avoid Mac error commit 26c9cb668c7fbf9830516b75d8bee70b699ed449 upstream. Mac requires the unicode flag to be set for cifs, even for the smb echo request (which doesn't have strings). Without this Mac rejects the periodic echo requests (when mounting with cifs) that we use to check if server is down Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5d21f00ae341..205fd94f52fd 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -718,6 +718,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server) if (rc) return rc; + if (server->capabilities & CAP_UNICODE) + smb->hdr.Flags2 |= SMBFLG2_UNICODE; + /* set up echo request */ smb->hdr.Tid = 0xffff; smb->hdr.WordCount = 1; From 6f95f1925abbced7445385d5b06c7cfc558dec36 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 3 May 2017 21:12:20 -0500 Subject: [PATCH 098/465] SMB3: Work around mount failure when using SMB3 dialect to Macs commit 7db0a6efdc3e990cdfd4b24820d010e9eb7890ad upstream. Macs send the maximum buffer size in response on ioctl to validate negotiate security information, which causes us to fail the mount as the response buffer is larger than the expected response. Changed ioctl response processing to allow for padding of validate negotiate ioctl response and limit the maximum response size to maximum buffer size. Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 02da648041fc..0fd63f0bc440 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -632,8 +632,12 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { - cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); - return -EIO; + cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n", + rsplen); + + /* relax check since Mac returns max bufsize allowed on ioctl */ + if (rsplen > CIFSMaxBufSize) + return -EIO; } /* check validate negotiate info response matches what we got earlier */ @@ -1853,8 +1857,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, * than one credit. Windows typically sets this smaller, but for some * ioctls it may be useful to allow server to send more. No point * limiting what the server can send as long as fits in one credit + * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE + * (by default, note that it can be overridden to make max larger) + * in responses (except for read responses which can be bigger. + * We may want to bump this limit up */ - req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */ + req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize); if (is_fsctl) req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); From 160239dfbf8925cc5ab11b39852ec37ab9df437a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Jacke?= Date: Wed, 3 May 2017 23:47:44 +0200 Subject: [PATCH 099/465] CIFS: fix mapping of SFM_SPACE and SFM_PERIOD commit b704e70b7cf48f9b67c07d585168e102dfa30bb4 upstream. - trailing space maps to 0xF028 - trailing period maps to 0xF029 This fix corrects the mapping of file names which have a trailing character that would otherwise be illegal (period or space) but is allowed by POSIX. Signed-off-by: Bjoern Jacke Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_unicode.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 3d7298cc0aeb..7bfb76e60401 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -64,8 +64,8 @@ #define SFM_LESSTHAN ((__u16) 0xF023) #define SFM_PIPE ((__u16) 0xF027) #define SFM_SLASH ((__u16) 0xF026) -#define SFM_PERIOD ((__u16) 0xF028) -#define SFM_SPACE ((__u16) 0xF029) +#define SFM_SPACE ((__u16) 0xF028) +#define SFM_PERIOD ((__u16) 0xF029) /* * Mapping mechanism to use when one of the seven reserved characters is From 63d86cd9703cc367bfe635b20596fbca43e3e0c9 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 3 May 2017 17:39:09 +0200 Subject: [PATCH 100/465] cifs: fix leak in FSCTL_ENUM_SNAPS response handling commit 0e5c795592930d51fd30d53a2e7b73cba022a29b upstream. The server may respond with success, and an output buffer less than sizeof(struct smb_snapshot_array) in length. Do not leak the output buffer in this case. Fixes: 834170c85978 ("Enable previous version support") Signed-off-by: David Disseldorp Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 152e37f2ad92..c58691834eb2 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -942,6 +942,7 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, } if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) { rc = -ERANGE; + kfree(retbuf); return rc; } From 076c404c128dd1c2c3ef6971fd95fbfb4b7df411 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 3 May 2017 17:39:08 +0200 Subject: [PATCH 101/465] cifs: fix CIFS_ENUMERATE_SNAPSHOTS oops commit 6026685de33b0db5b2b6b0e9b41b3a1a3261033c upstream. As with 618763958b22, an open directory may have a NULL private_data pointer prior to readdir. CIFS_ENUMERATE_SNAPSHOTS must check for this before dereference. Fixes: 834170c85978 ("Enable previous version support") Signed-off-by: David Disseldorp Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 7f4bba574930..4d598a71cf84 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -213,6 +213,8 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); break; case CIFS_ENUMERATE_SNAPSHOTS: + if (pSMBFile == NULL) + break; if (arg == 0) { rc = -EINVAL; goto cifs_ioc_exit; From 33fa011644900f7bf551c29b26eebaddb1eec020 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 3 May 2017 17:54:01 +0200 Subject: [PATCH 102/465] CIFS: fix oplock break deadlocks commit 3998e6b87d4258a70df358296d6f1c7234012bfe upstream. When the final cifsFileInfo_put() is called from cifsiod and an oplock break work is queued, lockdep complains loudly: ============================================= [ INFO: possible recursive locking detected ] 4.11.0+ #21 Not tainted --------------------------------------------- kworker/0:2/78 is trying to acquire lock: ("cifsiod"){++++.+}, at: flush_work+0x215/0x350 but task is already holding lock: ("cifsiod"){++++.+}, at: process_one_work+0x255/0x8e0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock("cifsiod"); lock("cifsiod"); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by kworker/0:2/78: #0: ("cifsiod"){++++.+}, at: process_one_work+0x255/0x8e0 #1: ((&wdata->work)){+.+...}, at: process_one_work+0x255/0x8e0 stack backtrace: CPU: 0 PID: 78 Comm: kworker/0:2 Not tainted 4.11.0+ #21 Workqueue: cifsiod cifs_writev_complete Call Trace: dump_stack+0x85/0xc2 __lock_acquire+0x17dd/0x2260 ? match_held_lock+0x20/0x2b0 ? trace_hardirqs_off_caller+0x86/0x130 ? mark_lock+0xa6/0x920 lock_acquire+0xcc/0x260 ? lock_acquire+0xcc/0x260 ? flush_work+0x215/0x350 flush_work+0x236/0x350 ? flush_work+0x215/0x350 ? destroy_worker+0x170/0x170 __cancel_work_timer+0x17d/0x210 ? ___preempt_schedule+0x16/0x18 cancel_work_sync+0x10/0x20 cifsFileInfo_put+0x338/0x7f0 cifs_writedata_release+0x2a/0x40 ? cifs_writedata_release+0x2a/0x40 cifs_writev_complete+0x29d/0x850 ? preempt_count_sub+0x18/0xd0 process_one_work+0x304/0x8e0 worker_thread+0x9b/0x6a0 kthread+0x1b2/0x200 ? process_one_work+0x8e0/0x8e0 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x31/0x40 This is a real warning. Since the oplock is queued on the same workqueue this can deadlock if there is only one worker thread active for the workqueue (which will be the case during memory pressure when the rescuer thread is handling it). Furthermore, there is at least one other kind of hang possible due to the oplock break handling if there is only worker. (This can be reproduced without introducing memory pressure by having passing 1 for the max_active parameter of cifsiod.) cifs_oplock_break() can wait indefintely in the filemap_fdatawait() while the cifs_writev_complete() work is blocked: sysrq: SysRq : Show Blocked State task PC stack pid father kworker/0:1 D 0 16 2 0x00000000 Workqueue: cifsiod cifs_oplock_break Call Trace: __schedule+0x562/0xf40 ? mark_held_locks+0x4a/0xb0 schedule+0x57/0xe0 io_schedule+0x21/0x50 wait_on_page_bit+0x143/0x190 ? add_to_page_cache_lru+0x150/0x150 __filemap_fdatawait_range+0x134/0x190 ? do_writepages+0x51/0x70 filemap_fdatawait_range+0x14/0x30 filemap_fdatawait+0x3b/0x40 cifs_oplock_break+0x651/0x710 ? preempt_count_sub+0x18/0xd0 process_one_work+0x304/0x8e0 worker_thread+0x9b/0x6a0 kthread+0x1b2/0x200 ? process_one_work+0x8e0/0x8e0 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x31/0x40 dd D 0 683 171 0x00000000 Call Trace: __schedule+0x562/0xf40 ? mark_held_locks+0x29/0xb0 schedule+0x57/0xe0 io_schedule+0x21/0x50 wait_on_page_bit+0x143/0x190 ? add_to_page_cache_lru+0x150/0x150 __filemap_fdatawait_range+0x134/0x190 ? do_writepages+0x51/0x70 filemap_fdatawait_range+0x14/0x30 filemap_fdatawait+0x3b/0x40 filemap_write_and_wait+0x4e/0x70 cifs_flush+0x6a/0xb0 filp_close+0x52/0xa0 __close_fd+0xdc/0x150 SyS_close+0x33/0x60 entry_SYSCALL_64_fastpath+0x1f/0xbe Showing all locks held in the system: 2 locks held by kworker/0:1/16: #0: ("cifsiod"){.+.+.+}, at: process_one_work+0x255/0x8e0 #1: ((&cfile->oplock_break)){+.+.+.}, at: process_one_work+0x255/0x8e0 Showing busy workqueues and worker pools: workqueue cifsiod: flags=0xc pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/1 in-flight: 16:cifs_oplock_break delayed: cifs_writev_complete, cifs_echo_request pool 0: cpus=0 node=0 flags=0x0 nice=0 hung=0s workers=3 idle: 750 3 Fix these problems by creating a a new workqueue (with a rescuer) for the oplock break work. Signed-off-by: Rabin Vincent Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsfs.c | 15 +++++++++++++-- fs/cifs/cifsglob.h | 1 + fs/cifs/misc.c | 2 +- fs/cifs/smb2misc.c | 5 +++-- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index dd3f5fabfdf6..aa553d0b58db 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +struct workqueue_struct *cifsoplockd_wq; __u32 cifs_lock_secret; /* @@ -1369,9 +1370,16 @@ init_cifs(void) goto out_clean_proc; } + cifsoplockd_wq = alloc_workqueue("cifsoplockd", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!cifsoplockd_wq) { + rc = -ENOMEM; + goto out_destroy_cifsiod_wq; + } + rc = cifs_fscache_register(); if (rc) - goto out_destroy_wq; + goto out_destroy_cifsoplockd_wq; rc = cifs_init_inodecache(); if (rc) @@ -1419,7 +1427,9 @@ init_cifs(void) cifs_destroy_inodecache(); out_unreg_fscache: cifs_fscache_unregister(); -out_destroy_wq: +out_destroy_cifsoplockd_wq: + destroy_workqueue(cifsoplockd_wq); +out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); out_clean_proc: cifs_proc_clean(); @@ -1442,6 +1452,7 @@ exit_cifs(void) cifs_destroy_mids(); cifs_destroy_inodecache(); cifs_fscache_unregister(); + destroy_workqueue(cifsoplockd_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 37f5a41cc50c..17f0e732eedc 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1683,6 +1683,7 @@ void cifs_oplock_break(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; +extern struct workqueue_struct *cifsoplockd_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_mid_poolp; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index d3fb11529ed9..b578c6d09597 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -492,7 +492,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &pCifsInode->flags); - queue_work(cifsiod_wq, + queue_work(cifsoplockd_wq, &netfile->oplock_break); netfile->oplock_break_cancelled = false; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 1a04b3a5beb1..7b08a1446a7f 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -499,7 +499,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, else cfile->oplock_break_cancelled = true; - queue_work(cifsiod_wq, &cfile->oplock_break); + queue_work(cifsoplockd_wq, &cfile->oplock_break); kfree(lw); return true; } @@ -643,7 +643,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); spin_unlock(&cfile->file_info_lock); - queue_work(cifsiod_wq, &cfile->oplock_break); + queue_work(cifsoplockd_wq, + &cfile->oplock_break); spin_unlock(&tcon->open_file_lock); spin_unlock(&cifs_tcp_ses_lock); From c4f35cc8dc593ac6633f24459e2bc445c9d50bd6 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Thu, 4 May 2017 00:41:13 +0200 Subject: [PATCH 103/465] cifs: fix CIFS_IOC_GET_MNT_INFO oops commit d8a6e505d6bba2250852fbc1c1c86fe68aaf9af3 upstream. An open directory may have a NULL private_data pointer prior to readdir. Fixes: 0de1f4c6f6c0 ("Add way to query server fs info for smb3") Signed-off-by: David Disseldorp Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 4d598a71cf84..76fb0917dc8c 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -209,6 +209,8 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EOPNOTSUPP; break; case CIFS_IOC_GET_MNT_INFO: + if (pSMBFile == NULL) + break; tcon = tlink_tcon(pSMBFile->tlink); rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); break; From 6a1baa3a163c8a802c008eeac65ab659ecdd0e0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Jacke?= Date: Fri, 5 May 2017 04:36:16 +0200 Subject: [PATCH 104/465] CIFS: add misssing SFM mapping for doublequote commit 85435d7a15294f9f7ef23469e6aaf7c5dfcc54f0 upstream. SFM is mapping doublequote to 0xF020 Without this patch creating files with doublequote fails to Windows/Mac Signed-off-by: Bjoern Jacke Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_unicode.c | 6 ++++++ fs/cifs/cifs_unicode.h | 1 + 2 files changed, 7 insertions(+) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 02b071bf3732..a0b3e7d1be48 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -83,6 +83,9 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_COLON: *target = ':'; break; + case SFM_DOUBLEQUOTE: + *target = '"'; + break; case SFM_ASTERISK: *target = '*'; break; @@ -418,6 +421,9 @@ static __le16 convert_to_sfm_char(char src_char, bool end_of_string) case ':': dest_char = cpu_to_le16(SFM_COLON); break; + case '"': + dest_char = cpu_to_le16(SFM_DOUBLEQUOTE); + break; case '*': dest_char = cpu_to_le16(SFM_ASTERISK); break; diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 7bfb76e60401..8a79a34e66b8 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -57,6 +57,7 @@ * not conflict (although almost does) with the mapping above. */ +#define SFM_DOUBLEQUOTE ((__u16) 0xF020) #define SFM_ASTERISK ((__u16) 0xF021) #define SFM_QUESTION ((__u16) 0xF025) #define SFM_COLON ((__u16) 0xF022) From f33f17b28d312ac2cc7c06e1a96aaa93a803be1d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 24 Apr 2017 22:26:40 +0300 Subject: [PATCH 105/465] ovl: do not set overlay.opaque on non-dir create commit 4a99f3c83dc493c8ea84693d78cd792839c8aa64 upstream. The optimization for opaque dir create was wrongly being applied also to non-dir create. Fixes: 97c684cc9110 ("ovl: create directories inside merged parent opaque") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 6515796460df..bfabc65fdc74 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -210,7 +210,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (err) goto out_dput; - if (ovl_type_merge(dentry->d_parent)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } From 7d708085476d8183c4753d3a1f8ca813fbb0d945 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 7 Apr 2017 02:33:30 +0200 Subject: [PATCH 106/465] padata: free correct variable commit 07a77929ba672d93642a56dc2255dd21e6e2290b upstream. The author meant to free the variable that was just allocated, instead of the one that failed to be allocated, but made a simple typo. This patch rectifies that. Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- kernel/padata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/padata.c b/kernel/padata.c index 3202aa17492c..f1aef1639204 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -354,7 +354,7 @@ static int padata_setup_cpumasks(struct parallel_data *pd, cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { - free_cpumask_var(pd->cpumask.cbcpu); + free_cpumask_var(pd->cpumask.pcpu); return -ENOMEM; } From 9bcb9cc96d7396430d9f978cce5085f8c9928970 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Apr 2017 12:06:37 +1000 Subject: [PATCH 107/465] md/raid1: avoid reusing a resync bio after error handling. commit 0c9d5b127f695818c2c5a3868c1f28ca2969e905 upstream. fix_sync_read_error() modifies a bio on a newly faulty device by setting bi_end_io to end_sync_write. This ensure that put_buf() will still call rdev_dec_pending() as required, but makes sure that subsequent code in fix_sync_read_error() doesn't try to read from the device. Unfortunately this interacts badly with sync_request_write() which assumes that any bio with bi_end_io set to non-NULL other than end_sync_read is safe to write to. As the device is now faulty it doesn't make sense to write. As the bio was recently used for a read, it is "dirty" and not suitable for immediate submission. In particular, ->bi_next might be non-NULL, which will cause generic_make_request() to complain. Break this interaction by refusing to write to devices which are marked as Faulty. Reported-and-tested-by: Michael Wang Fixes: 2e52d449bcec ("md/raid1: add failfast handling for reads.") Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a34f58772022..839ead062645 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2222,6 +2222,8 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) (i == r1_bio->read_disk || !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) continue; + if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) + continue; bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) From 8931477790743c6c2593948b8dddb452171eb3a8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 17 Mar 2017 12:48:09 -0600 Subject: [PATCH 108/465] device-dax: fix cdev leak commit ed01e50acdd3e4a640cf9ebd28a7e810c3ceca97 upstream. If device_add() fails, cleanup the cdev. Otherwise, we leak a kobj_map() with a stale device number. As Jason points out, there is a small possibility that userspace has opened and mapped the device in the time between cdev_add() and the device_add() failure. We need a new kill_dax_dev() helper to invalidate any established mappings. Fixes: ba09c01d2fa8 ("dax: convert to the cdev api") Reported-by: Jason Gunthorpe Signed-off-by: Dan Williams Signed-off-by: Logan Gunthorpe Reviewed-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/dax/dax.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 806f180c80d8..ca939ffd3a4d 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -703,13 +703,10 @@ static void dax_dev_release(struct device *dev) kfree(dax_dev); } -static void unregister_dax_dev(void *dev) +static void kill_dax_dev(struct dax_dev *dax_dev) { - struct dax_dev *dax_dev = to_dax_dev(dev); struct cdev *cdev = &dax_dev->cdev; - dev_dbg(dev, "%s\n", __func__); - /* * Note, rcu is not protecting the liveness of dax_dev, rcu is * ensuring that any fault handlers that might have seen @@ -721,6 +718,15 @@ static void unregister_dax_dev(void *dev) synchronize_srcu(&dax_srcu); unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); cdev_del(cdev); +} + +static void unregister_dax_dev(void *dev) +{ + struct dax_dev *dax_dev = to_dax_dev(dev); + + dev_dbg(dev, "%s\n", __func__); + + kill_dax_dev(dax_dev); device_unregister(dev); } @@ -797,6 +803,7 @@ struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region, dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id); rc = device_add(dev); if (rc) { + kill_dax_dev(dax_dev); put_device(dev); return ERR_PTR(rc); } From 55dbfd7dcd7172681f1c2397b6f6c5276fc4763a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 30 Apr 2017 06:57:01 -0700 Subject: [PATCH 109/465] device-dax: fix sysfs attribute deadlock commit 565851c972b50612f3a4542e26879ffb3e906fc2 upstream. Usage of device_lock() for dax_region attributes is unnecessary and deadlock prone. It's unnecessary because the order of registration / un-registration guarantees that drvdata is always valid. It's deadlock prone because it sets up this situation: ndctl D 0 2170 2082 0x00000000 Call Trace: __schedule+0x31f/0x980 schedule+0x3d/0x90 schedule_preempt_disabled+0x15/0x20 __mutex_lock+0x402/0x980 ? __mutex_lock+0x158/0x980 ? align_show+0x2b/0x80 [dax] ? kernfs_seq_start+0x2f/0x90 mutex_lock_nested+0x1b/0x20 align_show+0x2b/0x80 [dax] dev_attr_show+0x20/0x50 ndctl D 0 2186 2079 0x00000000 Call Trace: __schedule+0x31f/0x980 schedule+0x3d/0x90 __kernfs_remove+0x1f6/0x340 ? kernfs_remove_by_name_ns+0x45/0xa0 ? remove_wait_queue+0x70/0x70 kernfs_remove_by_name_ns+0x45/0xa0 remove_files.isra.1+0x35/0x70 sysfs_remove_group+0x44/0x90 sysfs_remove_groups+0x2e/0x50 dax_region_unregister+0x25/0x40 [dax] devm_action_release+0xf/0x20 release_nodes+0x16d/0x2b0 devres_release_all+0x3c/0x60 device_release_driver_internal+0x17d/0x220 device_release_driver+0x12/0x20 unbind_store+0x112/0x160 ndctl/2170 is trying to acquire the device_lock() to read an attribute, and ndctl/2186 is holding the device_lock() while trying to drain all active attribute readers. Thanks to Yi Zhang for the reproduction script. Fixes: d7fe1a67f658 ("dax: add region 'id', 'size', and 'align' attributes") Reported-by: Yi Zhang Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/dax/dax.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index ca939ffd3a4d..f71ececd2678 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -77,36 +77,27 @@ struct dax_dev { struct resource res[0]; }; +/* + * Rely on the fact that drvdata is set before the attributes are + * registered, and that the attributes are unregistered before drvdata + * is cleared to assume that drvdata is always valid. + */ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dax_region *dax_region; - ssize_t rc = -ENXIO; + struct dax_region *dax_region = dev_get_drvdata(dev); - device_lock(dev); - dax_region = dev_get_drvdata(dev); - if (dax_region) - rc = sprintf(buf, "%d\n", dax_region->id); - device_unlock(dev); - - return rc; + return sprintf(buf, "%d\n", dax_region->id); } static DEVICE_ATTR_RO(id); static ssize_t region_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dax_region *dax_region; - ssize_t rc = -ENXIO; + struct dax_region *dax_region = dev_get_drvdata(dev); - device_lock(dev); - dax_region = dev_get_drvdata(dev); - if (dax_region) - rc = sprintf(buf, "%llu\n", (unsigned long long) - resource_size(&dax_region->res)); - device_unlock(dev); - - return rc; + return sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&dax_region->res)); } static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, region_size_show, NULL); @@ -114,16 +105,9 @@ static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dax_region *dax_region; - ssize_t rc = -ENXIO; + struct dax_region *dax_region = dev_get_drvdata(dev); - device_lock(dev); - dax_region = dev_get_drvdata(dev); - if (dax_region) - rc = sprintf(buf, "%u\n", dax_region->align); - device_unlock(dev); - - return rc; + return sprintf(buf, "%u\n", dax_region->align); } static DEVICE_ATTR_RO(align); From 06305f51ef219cb5dbbfe1bab5e71f23f1a06fef Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 12 May 2017 15:46:47 -0700 Subject: [PATCH 110/465] dax: prevent invalidation of mapped DAX entries commit 4636e70bb0a8b871998b6841a2e4b205cf2bc863 upstream. Patch series "mm,dax: Fix data corruption due to mmap inconsistency", v4. This series fixes data corruption that can happen for DAX mounts when page faults race with write(2) and as a result page tables get out of sync with block mappings in the filesystem and thus data seen through mmap is different from data seen through read(2). The series passes testing with t_mmap_stale test program from Ross and also other mmap related tests on DAX filesystem. This patch (of 4): dax_invalidate_mapping_entry() currently removes DAX exceptional entries only if they are clean and unlocked. This is done via: invalidate_mapping_pages() invalidate_exceptional_entry() dax_invalidate_mapping_entry() However, for page cache pages removed in invalidate_mapping_pages() there is an additional criteria which is that the page must not be mapped. This is noted in the comments above invalidate_mapping_pages() and is checked in invalidate_inode_page(). For DAX entries this means that we can can end up in a situation where a DAX exceptional entry, either a huge zero page or a regular DAX entry, could end up mapped but without an associated radix tree entry. This is inconsistent with the rest of the DAX code and with what happens in the page cache case. We aren't able to unmap the DAX exceptional entry because according to its comments invalidate_mapping_pages() isn't allowed to block, and unmap_mapping_range() takes a write lock on the mapping->i_mmap_rwsem. Since we essentially never have unmapped DAX entries to evict from the radix tree, just remove dax_invalidate_mapping_entry(). Fixes: c6dcf52c23d2 ("mm: Invalidate DAX radix tree entries only if appropriate") Link: http://lkml.kernel.org/r/20170510085419.27601-2-jack@suse.cz Signed-off-by: Ross Zwisler Signed-off-by: Jan Kara Reported-by: Jan Kara Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 29 ----------------------------- include/linux/dax.h | 1 - mm/truncate.c | 9 +++------ 3 files changed, 3 insertions(+), 36 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 85abd741253d..166504c1a20f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -506,35 +506,6 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) return ret; } -/* - * Invalidate exceptional DAX entry if easily possible. This handles DAX - * entries for invalidate_inode_pages() so we evict the entry only if we can - * do so without blocking. - */ -int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) -{ - int ret = 0; - void *entry, **slot; - struct radix_tree_root *page_tree = &mapping->page_tree; - - spin_lock_irq(&mapping->tree_lock); - entry = __radix_tree_lookup(page_tree, index, NULL, &slot); - if (!entry || !radix_tree_exceptional_entry(entry) || - slot_locked(mapping, slot)) - goto out; - if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || - radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) - goto out; - radix_tree_delete(page_tree, index); - mapping->nrexceptional--; - ret = 1; -out: - spin_unlock_irq(&mapping->tree_lock); - if (ret) - dax_wake_mapping_entry_waiter(mapping, index, entry, true); - return ret; -} - /* * Invalidate exceptional DAX entry if it is clean. */ diff --git a/include/linux/dax.h b/include/linux/dax.h index d8a3dc042e1c..f8e1833f81f6 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -41,7 +41,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); -int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, diff --git a/mm/truncate.c b/mm/truncate.c index 6263affdef88..c5371840fc1b 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping, /* * Invalidate exceptional entry if easily possible. This handles exceptional - * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and - * clean entries. + * entries for invalidate_inode_pages(). */ static int invalidate_exceptional_entry(struct address_space *mapping, pgoff_t index, void *entry) { - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) + /* Handled by shmem itself, or for DAX we do nothing. */ + if (shmem_mapping(mapping) || dax_mapping(mapping)) return 1; - if (dax_mapping(mapping)) - return dax_invalidate_mapping_entry(mapping, index); clear_shadow_entry(mapping, index, entry); return 1; } From 85f353dbd0a3113c5dd23940572e51c4750ea513 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 12 May 2017 15:46:50 -0700 Subject: [PATCH 111/465] mm: fix data corruption due to stale mmap reads commit cd656375f94632d7b5af57bf67b7b5c0270c591c upstream. Currently, we didn't invalidate page tables during invalidate_inode_pages2() for DAX. That could result in e.g. 2MiB zero page being mapped into page tables while there were already underlying blocks allocated and thus data seen through mmap were different from data seen by read(2). The following sequence reproduces the problem: - open an mmap over a 2MiB hole - read from a 2MiB hole, faulting in a 2MiB zero page - write to the hole with write(3p). The write succeeds but we incorrectly leave the 2MiB zero page mapping intact. - via the mmap, read the data that was just written. Since the zero page mapping is still intact we read back zeroes instead of the new data. Fix the problem by unconditionally calling invalidate_inode_pages2_range() in dax_iomap_actor() for new block allocations and by properly invalidating page tables in invalidate_inode_pages2_range() for DAX mappings. Fixes: c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 Link: http://lkml.kernel.org/r/20170510085419.27601-3-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Ross Zwisler Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 2 +- mm/truncate.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 166504c1a20f..6fc95933a561 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1003,7 +1003,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * into page tables. We have to tear down these mappings so that data * written by write(2) is visible in mmap. */ - if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { + if (iomap->flags & IOMAP_F_NEW) { invalidate_inode_pages2_range(inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); diff --git a/mm/truncate.c b/mm/truncate.c index c5371840fc1b..5f1c4b65239e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -683,6 +683,17 @@ int invalidate_inode_pages2_range(struct address_space *mapping, cond_resched(); index++; } + /* + * For DAX we invalidate page tables after invalidating radix tree. We + * could invalidate page tables while invalidating each entry however + * that would be expensive. And doing range unmapping before doesn't + * work as we have no cheap way to find whether radix tree entry didn't + * get remapped later. + */ + if (dax_mapping(mapping)) { + unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT, + (loff_t)(end - start + 1) << PAGE_SHIFT, 0); + } cleancache_invalidate_inode(mapping); return ret; } From 5a3651b4a92cbc5230d67d2ce87fb3f7373c7665 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 12 May 2017 15:46:54 -0700 Subject: [PATCH 112/465] ext4: return to starting transaction in ext4_dax_huge_fault() commit fb26a1cbed8c90025572d48bc9eabe59f7571e88 upstream. DAX will return to locking exceptional entry before mapping blocks for a page fault to fix possible races with concurrent writes. To avoid lock inversion between exceptional entry lock and transaction start, start the transaction already in ext4_dax_huge_fault(). Fixes: 9f141d6ef6258a3a37a045842d9ba7e68f368956 Link: http://lkml.kernel.org/r/20170510085419.27601-4-jack@suse.cz Signed-off-by: Jan Kara Cc: Ross Zwisler Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index cefa9835f275..831fd6beebf0 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -257,6 +257,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { int result; + handle_t *handle = NULL; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -264,12 +265,24 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, if (write) { sb_start_pagefault(sb); file_update_time(vmf->vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, + EXT4_DATA_TRANS_BLOCKS(sb)); + } else { + down_read(&EXT4_I(inode)->i_mmap_sem); } - down_read(&EXT4_I(inode)->i_mmap_sem); - result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); - up_read(&EXT4_I(inode)->i_mmap_sem); - if (write) + if (!IS_ERR(handle)) + result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); + else + result = VM_FAULT_SIGBUS; + if (write) { + if (!IS_ERR(handle)) + ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); + } else { + up_read(&EXT4_I(inode)->i_mmap_sem); + } return result; } From 03606c8ecc11642f2906c17349994f7b4edf0f31 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 12 May 2017 15:47:00 -0700 Subject: [PATCH 113/465] dax: fix PMD data corruption when fault races with write commit 876f29460cbd4086b43475890c1bf2488fa11d40 upstream. This is based on a patch from Jan Kara that fixed the equivalent race in the DAX PTE fault path. Currently DAX PMD read fault can race with write(2) in the following way: CPU1 - write(2) CPU2 - read fault dax_iomap_pmd_fault() ->iomap_begin() - sees hole dax_iomap_rw() iomap_apply() ->iomap_begin - allocates blocks dax_iomap_actor() invalidate_inode_pages2_range() - there's nothing to invalidate grab_mapping_entry() - we add huge zero page to the radix tree and map it to page tables The result is that hole page is mapped into page tables (and thus zeros are seen in mmap) while file has data written in that place. Fix the problem by locking exception entry before mapping blocks for the fault. That way we are sure invalidate_inode_pages2_range() call for racing write will either block on entry lock waiting for the fault to finish (and unmap stale page tables after that) or read fault will see already allocated blocks by write(2). Fixes: 9f141d6ef6258 ("dax: Call ->iomap_begin without entry lock during dax fault") Link: http://lkml.kernel.org/r/20170510172700.18991-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 6fc95933a561..b87f3ab742ba 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1352,6 +1352,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, if ((pgoff | PG_PMD_COLOUR) > max_pgoff) goto fallback; + /* + * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX + * PMD or a HZP entry. If it can't (because a 4k page is already in + * the tree, for instance), it will return -EEXIST and we just fall + * back to 4k entries. + */ + entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); + if (IS_ERR(entry)) + goto fallback; + /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way @@ -1360,21 +1370,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pos = (loff_t)pgoff << PAGE_SHIFT; error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); if (error) - goto fallback; + goto unlock_entry; if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; - /* - * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX - * PMD or a HZP entry. If it can't (because a 4k page is already in - * the tree, for instance), it will return -EEXIST and we just fall - * back to 4k entries. - */ - entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); - if (IS_ERR(entry)) - goto finish_iomap; - switch (iomap.type) { case IOMAP_MAPPED: result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry); @@ -1382,7 +1382,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) - goto unlock_entry; + break; result = dax_pmd_load_hole(vmf, &iomap, &entry); break; default: @@ -1390,8 +1390,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, break; } - unlock_entry: - put_locked_mapping_entry(mapping, pgoff, entry); finish_iomap: if (ops->iomap_end) { int copied = PMD_SIZE; @@ -1407,6 +1405,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, &iomap); } + unlock_entry: + put_locked_mapping_entry(mapping, pgoff, entry); fallback: if (result == VM_FAULT_FALLBACK) { split_huge_pmd(vma, vmf->pmd, vmf->address); From 33cbcc2556b36a94b7387a995756e7c5f07e05d9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 25 Mar 2017 00:03:02 -0700 Subject: [PATCH 114/465] f2fs: fix wrong max cost initialization commit c541a51b8ce81d003b02ed67ad3604a2e6220e3e upstream. This patch fixes missing increased max cost caused by a patch that we increased cose of data segments in greedy algorithm. Fixes: b9cd20619 "f2fs: node segment is prior to data segment selected victim" Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 418fd9881646..b5a62d4a3a69 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -182,7 +182,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return sbi->blocks_per_seg; if (p->gc_mode == GC_GREEDY) - return sbi->blocks_per_seg * p->ofs_unit; + return 2 * sbi->blocks_per_seg * p->ofs_unit; else if (p->gc_mode == GC_CB) return UINT_MAX; else /* No other gc_mode */ From ed4d26a1e45b1cd0d2290cb50e9ab38ef74e0579 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 4 Apr 2017 16:45:30 -0700 Subject: [PATCH 115/465] Revert "f2fs: put allocate_segment after refresh_sit_entry" commit c6f82fe90d7458e5fa190a6820bfc24f96b0de4e upstream. This reverts commit 3436c4bdb30de421d46f58c9174669fbcfd40ce0. This makes a leak to register dirty segments. I reproduced the issue by modified postmark which injects a lot of file create/delete/update and finally triggers huge number of SSR allocations. [Jaegeuk Kim: Change missing incorrect comment] Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 29ef7088c558..0f051e3ef535 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1788,15 +1788,14 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); /* - * SIT information should be updated before segment allocation, - * since SSR needs latest valid block information. + * SIT information should be updated after segment allocation, + * since we need to keep dirty segments precisely under SSR. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - if (!__has_curseg_space(sbi, type)) - sit_i->s_ops->allocate_segment(sbi, type, false); - mutex_unlock(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) From e71f099677c18c7c7d9363dc9e89d27fe4a33b06 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Apr 2017 19:01:26 -0700 Subject: [PATCH 116/465] f2fs: fix fs corruption due to zero inode page commit 9bb02c3627f46e50246bf7ab957b56ffbef623cb upstream. This patch fixes the following scenario. - f2fs_create/f2fs_mkdir - write_checkpoint - f2fs_mark_inode_dirty_sync - block_operations - f2fs_lock_all - f2fs_sync_inode_meta - f2fs_unlock_all - sync_inode_metadata - f2fs_lock_op - f2fs_write_inode - update_inode_page - get_node_page return -ENOENT - new_inode_page - fill_node_footer - f2fs_mark_inode_dirty_sync - ... - f2fs_unlock_op - f2fs_inode_synced - f2fs_lock_all - do_checkpoint In this checkpoint, we can get an inode page which contains zeros having valid node footer only. Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 24bb8213d974..a204f22eba5b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -316,7 +316,6 @@ int update_inode_page(struct inode *inode) } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } - f2fs_inode_synced(inode); return 0; } ret = update_inode(inode, node_page); @@ -448,6 +447,7 @@ void handle_failed_inode(struct inode *inode) * in a panic when flushing dirty inodes in gdirty_list. */ update_inode_page(inode); + f2fs_inode_synced(inode); /* don't make bad inode, since it becomes a regular file. */ unlock_new_inode(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 98f00a3a7f50..a30f323b7b2b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -148,8 +148,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -163,6 +161,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -423,8 +423,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode_nohighmem(inode); inode->i_mapping->a_ops = &f2fs_dblock_aops; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -487,6 +485,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, } kfree(sd); + + f2fs_balance_fs(sbi, true); return err; out: handle_failed_inode(inode); @@ -508,8 +508,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); - f2fs_balance_fs(sbi, true); - set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -524,6 +522,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out_fail: @@ -554,8 +554,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -569,6 +567,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -595,8 +595,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) @@ -622,6 +620,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, /* link_count was changed by d_tmpfile as well. */ f2fs_unlock_op(sbi); unlock_new_inode(inode); + + f2fs_balance_fs(sbi, true); return 0; release_out: From 3a625468bd9e6a53d1078ba0a2e9c17117147183 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 22 Apr 2017 10:39:20 +0800 Subject: [PATCH 117/465] f2fs: fix multiple f2fs_add_link() having same name for inline dentry commit d3bb910c15d75ee3340311c64a1c05985bb663a3 upstream. Commit 88c5c13a5027 (f2fs: fix multiple f2fs_add_link() calls having same name) does not cover the scenario where inline dentry is enabled. In that case, F2FS_I(dir)->task will be NULL, and __f2fs_add_link will lookup dentries one more time. This patch fixes it by moving the assigment of current task to a upper level to cover both normal and inline dentry. Fixes: 88c5c13a5027 (f2fs: fix multiple f2fs_add_link() calls having same name) Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/dir.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 8d5c62b07b28..fcd1cff763e0 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -207,13 +207,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - /* This is to increase the speed of f2fs_create */ - if (!de && room) { - F2FS_I(dir)->task = current; - if (F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; - } + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; } return de; @@ -254,6 +250,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, break; } out: + /* This is to increase the speed of f2fs_create */ + if (!de) + F2FS_I(dir)->task = current; return de; } From bd3dfe5049498ee72c54341a12c5e84a6d61796a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 24 Apr 2017 10:00:08 -0700 Subject: [PATCH 118/465] f2fs: check entire encrypted bigname when finding a dentry commit 6332cd32c8290a80e929fc044dc5bdba77396e33 upstream. If user has no key under an encrypted dir, fscrypt gives digested dentries. Previously, when looking up a dentry, f2fs only checks its hash value with first 4 bytes of the digested dentry, which didn't handle hash collisions fully. This patch enhances to check entire dentry bytes likewise ext4. Eric reported how to reproduce this issue by: # seq -f "edir/abcdefghijklmnopqrstuvwxyz012345%.0f" 100000 | xargs touch # find edir -type f | xargs stat -c %i | sort | uniq | wc -l 100000 # sync # echo 3 > /proc/sys/vm/drop_caches # keyctl new_session # find edir -type f | xargs stat -c %i | sort | uniq | wc -l 99999 Reported-by: Eric Biggers Signed-off-by: Jaegeuk Kim (fixed f2fs_dentry_hash() to work even when the hash is 0) Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/dir.c | 37 +++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/hash.c | 7 ++++++- fs/f2fs/inline.c | 4 ++-- 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fcd1cff763e0..589fe7c59fd1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -130,19 +130,29 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, continue; } - /* encrypted case */ + if (de->hash_code != namehash) + goto not_match; + de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); - /* show encrypted name */ - if (fname->hash) { - if (de->hash_code == cpu_to_le32(fname->hash)) - goto found; - } else if (de_name.len == name->len && - de->hash_code == namehash && - !memcmp(de_name.name, name->name, name->len)) +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (unlikely(!name->name)) { + if (fname->usr_fname->name[0] == '_') { + if (de_name.len >= 16 && + !memcmp(de_name.name + de_name.len - 16, + fname->crypto_buf.name + 8, 16)) + goto found; + goto not_match; + } + name->name = fname->crypto_buf.name; + name->len = fname->crypto_buf.len; + } +#endif + if (de_name.len == name->len && + !memcmp(de_name.name, name->name, name->len)) goto found; - +not_match: if (max_slots && max_len > *max_slots) *max_slots = max_len; max_len = 0; @@ -170,12 +180,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; - f2fs_hash_t namehash; - - if(fname->hash) - namehash = cpu_to_le32(fname->hash); - else - namehash = f2fs_dentry_hash(&name); + f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); @@ -541,7 +546,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, level = 0; slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(new_name); + dentry_hash = f2fs_dentry_hash(new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0a6e115562f6..05d7e2cefc56 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2133,7 +2133,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info); +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, + struct fscrypt_name *fname); /* * node.c diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 71b7206c431e..eb2e031ea887 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -70,7 +70,8 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, + struct fscrypt_name *fname) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -79,6 +80,10 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) const unsigned char *name = name_info->name; size_t len = name_info->len; + /* encrypted bigname case */ + if (fname && !fname->disk_name.name) + return cpu_to_le32(fname->hash); + if (is_dot_dotdot(name_info)) return 0; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e32a9e527968..fa729ff6b2f9 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -296,7 +296,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, return NULL; } - namehash = f2fs_dentry_hash(&name); + namehash = f2fs_dentry_hash(&name, fname); inline_dentry = inline_data_addr(ipage); @@ -533,7 +533,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true); - name_hash = f2fs_dentry_hash(new_name); + name_hash = f2fs_dentry_hash(new_name, NULL); make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); From 9b3be66cb479de75baafbf8ae2d3fcf979c109c2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 May 2017 17:03:47 +0200 Subject: [PATCH 119/465] f2fs: Make flush bios explicitely sync commit 3adc5fcb7edf5f8dfe8d37dcb50ba6b30077c905 upstream. Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions. Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: Jaegeuk Kim CC: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Jan Kara Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 2 +- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1602b4bccae6..dd24476e8d2c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -309,7 +309,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - io->fio.op_flags = REQ_META | REQ_PRIO; + io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; if (!test_opt(sbi, NOBARRIER)) io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0f051e3ef535..56670c5058b7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -416,7 +416,7 @@ static int __submit_flush_wait(struct block_device *bdev) struct bio *bio = f2fs_bio_alloc(0); int ret; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; bio->bi_bdev = bdev; ret = submit_bio_wait(bio); bio_put(bio); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 858aef564a58..5ca78308d5ec 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1307,7 +1307,7 @@ static int __f2fs_commit_super(struct buffer_head *bh, unlock_buffer(bh); /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); + return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, From e54af78e1a74dc3d77a0da1d5a2889ab621cfc49 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 4 May 2017 21:15:56 +0900 Subject: [PATCH 120/465] initramfs: Always do fput() and load modules after rootfs populate commit 17a9be31747535184f2af156b1f080ec4c92a952 upstream. In OpenRISC we do not have a bootloader passed initrd, but the built in initramfs does contain the /init and other binaries, including modules. The previous commit 08865514805d2 ("initramfs: finish fput() before accessing any binary from initramfs") made a change to only call fput() if the bootloader initrd was available, this caused intermittent crashes for OpenRISC. This patch changes the fput() to happen unconditionally if any rootfs is loaded. Also, I added some comments to make it a bit more clear why we call unpack_to_rootfs() multiple times. Fixes: 08865514805d2 ("initramfs: finish fput() before accessing any binary from initramfs") Cc: Lokesh Vutla Cc: Al Viro Acked-by: Al Viro Signed-off-by: Stafford Horne Signed-off-by: Greg Kroah-Hartman --- init/initramfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index 981f286c1d16..3a6871597351 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -608,9 +608,11 @@ static void __init clean_rootfs(void) static int __init populate_rootfs(void) { + /* Load the built in initramfs */ char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) panic("%s", err); /* Failed to decompress INTERNAL initramfs */ + /* If available load the bootloader supplied initrd */ if (initrd_start) { #ifdef CONFIG_BLK_DEV_RAM int fd; @@ -648,13 +650,14 @@ static int __init populate_rootfs(void) printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); free_initrd(); #endif - flush_delayed_fput(); - /* - * Try loading default modules from initramfs. This gives - * us a chance to load before device_initcalls. - */ - load_default_modules(); } + flush_delayed_fput(); + /* + * Try loading default modules from initramfs. This gives + * us a chance to load before device_initcalls. + */ + load_default_modules(); + return 0; } rootfs_initcall(populate_rootfs); From 602f8911c7dc9e54aaef1c3cd3ef31855dc270e3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 May 2017 10:27:13 -0700 Subject: [PATCH 121/465] initramfs: avoid "label at end of compound statement" error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 394e4f5d5834b610ee032cceb20a1b1f45b01d28 upstream. Commit 17a9be317475 ("initramfs: Always do fput() and load modules after rootfs populate") introduced an error for the CONFIG_BLK_DEV_RAM=y case, because even though the code looks fine, the compiler really wants a statement after a label, or you'll get complaints: init/initramfs.c: In function 'populate_rootfs': init/initramfs.c:644:2: error: label at end of compound statement That commit moved the subsequent statements to outside the compound statement, leaving the label without any associated statements. Reported-by: Jörg Otte Fixes: 17a9be317475 ("initramfs: Always do fput() and load modules after rootfs populate") Cc: Al Viro Cc: Stafford Horne Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- init/initramfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/init/initramfs.c b/init/initramfs.c index 3a6871597351..8daf7ac6c7e2 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -642,6 +642,7 @@ static int __init populate_rootfs(void) free_initrd(); } done: + /* empty statement */; #else printk(KERN_INFO "Unpacking initramfs...\n"); err = unpack_to_rootfs((char *)initrd_start, From eb86b4c68b8c59eec5e44fa7e1ea98c0a63eaa24 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 7 Apr 2017 10:58:37 -0700 Subject: [PATCH 122/465] fscrypt: fix context consistency check when key(s) unavailable commit 272f98f6846277378e1758a49a49d7bf39343c02 upstream. To mitigate some types of offline attacks, filesystem encryption is designed to enforce that all files in an encrypted directory tree use the same encryption policy (i.e. the same encryption context excluding the nonce). However, the fscrypt_has_permitted_context() function which enforces this relies on comparing struct fscrypt_info's, which are only available when we have the encryption keys. This can cause two incorrect behaviors: 1. If we have the parent directory's key but not the child's key, or vice versa, then fscrypt_has_permitted_context() returned false, causing applications to see EPERM or ENOKEY. This is incorrect if the encryption contexts are in fact consistent. Although we'd normally have either both keys or neither key in that case since the master_key_descriptors would be the same, this is not guaranteed because keys can be added or removed from keyrings at any time. 2. If we have neither the parent's key nor the child's key, then fscrypt_has_permitted_context() returned true, causing applications to see no error (or else an error for some other reason). This is incorrect if the encryption contexts are in fact inconsistent, since in that case we should deny access. To fix this, retrieve and compare the fscrypt_contexts if we are unable to set up both fscrypt_infos. While this slightly hurts performance when accessing an encrypted directory tree without the key, this isn't a case we really need to be optimizing for; access *with* the key is much more important. Furthermore, the performance hit is barely noticeable given that we are already retrieving the fscrypt_context and doing two keyring searches in fscrypt_get_encryption_info(). If we ever actually wanted to optimize this case we might start by caching the fscrypt_contexts. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/crypto/policy.c | 87 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 19 deletions(-) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 4908906d54d5..fc3660d82a7f 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -143,27 +143,61 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) } EXPORT_SYMBOL(fscrypt_ioctl_get_policy); +/** + * fscrypt_has_permitted_context() - is a file's encryption policy permitted + * within its directory? + * + * @parent: inode for parent directory + * @child: inode for file being looked up, opened, or linked into @parent + * + * Filesystems must call this before permitting access to an inode in a + * situation where the parent directory is encrypted (either before allowing + * ->lookup() to succeed, or for a regular file before allowing it to be opened) + * and before any operation that involves linking an inode into an encrypted + * directory, including link, rename, and cross rename. It enforces the + * constraint that within a given encrypted directory tree, all files use the + * same encryption policy. The pre-access check is needed to detect potentially + * malicious offline violations of this constraint, while the link and rename + * checks are needed to prevent online violations of this constraint. + * + * Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail + * the filesystem operation with EPERM. + */ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { - struct fscrypt_info *parent_ci, *child_ci; + const struct fscrypt_operations *cops = parent->i_sb->s_cop; + const struct fscrypt_info *parent_ci, *child_ci; + struct fscrypt_context parent_ctx, child_ctx; int res; - if ((parent == NULL) || (child == NULL)) { - printk(KERN_ERR "parent %p child %p\n", parent, child); - BUG_ON(1); - } - /* No restrictions on file types which are never encrypted */ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && !S_ISLNK(child->i_mode)) return 1; - /* no restrictions if the parent directory is not encrypted */ - if (!parent->i_sb->s_cop->is_encrypted(parent)) + /* No restrictions if the parent directory is unencrypted */ + if (!cops->is_encrypted(parent)) return 1; - /* if the child directory is not encrypted, this is always a problem */ - if (!parent->i_sb->s_cop->is_encrypted(child)) + + /* Encrypted directories must not contain unencrypted files */ + if (!cops->is_encrypted(child)) return 0; + + /* + * Both parent and child are encrypted, so verify they use the same + * encryption policy. Compare the fscrypt_info structs if the keys are + * available, otherwise retrieve and compare the fscrypt_contexts. + * + * Note that the fscrypt_context retrieval will be required frequently + * when accessing an encrypted directory tree without the key. + * Performance-wise this is not a big deal because we already don't + * really optimize for file access without the key (to the extent that + * such access is even possible), given that any attempted access + * already causes a fscrypt_context retrieval and keyring search. + * + * In any case, if an unexpected error occurs, fall back to "forbidden". + */ + res = fscrypt_get_encryption_info(parent); if (res) return 0; @@ -172,17 +206,32 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) return 0; parent_ci = parent->i_crypt_info; child_ci = child->i_crypt_info; - if (!parent_ci && !child_ci) - return 1; - if (!parent_ci || !child_ci) + + if (parent_ci && child_ci) { + return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, + FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == + child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags); + } + + res = cops->get_context(parent, &parent_ctx, sizeof(parent_ctx)); + if (res != sizeof(parent_ctx)) + return 0; + + res = cops->get_context(child, &child_ctx, sizeof(child_ctx)); + if (res != sizeof(child_ctx)) return 0; - return (memcmp(parent_ci->ci_master_key, - child_ci->ci_master_key, - FS_KEY_DESCRIPTOR_SIZE) == 0 && - (parent_ci->ci_data_mode == child_ci->ci_data_mode) && - (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && - (parent_ci->ci_flags == child_ci->ci_flags)); + return memcmp(parent_ctx.master_key_descriptor, + child_ctx.master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ctx.contents_encryption_mode == + child_ctx.contents_encryption_mode) && + (parent_ctx.filenames_encryption_mode == + child_ctx.filenames_encryption_mode) && + (parent_ctx.flags == child_ctx.flags); } EXPORT_SYMBOL(fscrypt_has_permitted_context); From ab62f4118ba1d010a994d79f55ea8bd0b48f5b98 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Apr 2017 10:00:09 -0700 Subject: [PATCH 123/465] fscrypt: avoid collisions when presenting long encrypted filenames commit 6b06cdee81d68a8a829ad8e8d0f31d6836744af9 upstream. When accessing an encrypted directory without the key, userspace must operate on filenames derived from the ciphertext names, which contain arbitrary bytes. Since we must support filenames as long as NAME_MAX, we can't always just base64-encode the ciphertext, since that may make it too long. Currently, this is solved by presenting long names in an abbreviated form containing any needed filesystem-specific hashes (e.g. to identify a directory block), then the last 16 bytes of ciphertext. This needs to be sufficient to identify the actual name on lookup. However, there is a bug. It seems to have been assumed that due to the use of a CBC (ciphertext block chaining)-based encryption mode, the last 16 bytes (i.e. the AES block size) of ciphertext would depend on the full plaintext, preventing collisions. However, we actually use CBC with ciphertext stealing (CTS), which handles the last two blocks specially, causing them to appear "flipped". Thus, it's actually the second-to-last block which depends on the full plaintext. This caused long filenames that differ only near the end of their plaintexts to, when observed without the key, point to the wrong inode and be undeletable. For example, with ext4: # echo pass | e4crypt add_key -p 16 edir/ # seq -f "edir/abcdefghijklmnopqrstuvwxyz012345%.0f" 100000 | xargs touch # find edir/ -type f | xargs stat -c %i | sort | uniq | wc -l 100000 # sync # echo 3 > /proc/sys/vm/drop_caches # keyctl new_session # find edir/ -type f | xargs stat -c %i | sort | uniq | wc -l 2004 # rm -rf edir/ rm: cannot remove 'edir/_A7nNFi3rhkEQlJ6P,hdzluhODKOeWx5V': Structure needs cleaning ... To fix this, when presenting long encrypted filenames, encode the second-to-last block of ciphertext rather than the last 16 bytes. Although it would be nice to solve this without depending on a specific encryption mode, that would mean doing a cryptographic hash like SHA-256 which would be much less efficient. This way is sufficient for now, and it's still compatible with encryption modes like HEH which are strong pseudorandom permutations. Also, changing the presented names is still allowed at any time because they are only provided to allow applications to do things like delete encrypted directories. They're not designed to be used to persistently identify files --- which would be hard to do anyway, given that they're encrypted after all. For ease of backports, this patch only makes the minimal fix to both ext4 and f2fs. It leaves ubifs as-is, since ubifs doesn't compare the ciphertext block yet. Follow-on patches will clean things up properly and make the filesystems use a shared helper function. Fixes: 5de0b4d0cd15 ("ext4 crypto: simplify and speed up filename encryption") Reported-by: Gwendal Grignou Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/crypto/fname.c | 2 +- fs/ext4/namei.c | 4 ++-- fs/f2fs/dir.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 37b49894c762..15bf9c31a34d 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -300,7 +300,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, } else { memset(buf, 0, 8); } - memcpy(buf + 8, iname->name + iname->len - 16, 16); + memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16); oname->name[0] = '_'; oname->len = 1 + digest_encode(buf, 24, oname->name + 1); return 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 07e5e1405771..eb2dea8287c1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1255,9 +1255,9 @@ static inline int ext4_match(struct ext4_filename *fname, if (unlikely(!name)) { if (fname->usr_fname->name[0] == '_') { int ret; - if (de->name_len < 16) + if (de->name_len <= 32) return 0; - ret = memcmp(de->name + de->name_len - 16, + ret = memcmp(de->name + ((de->name_len - 17) & ~15), fname->crypto_buf.name + 8, 16); return (ret == 0) ? 1 : 0; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 589fe7c59fd1..96e9e7fa64dd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -139,8 +139,8 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, #ifdef CONFIG_F2FS_FS_ENCRYPTION if (unlikely(!name->name)) { if (fname->usr_fname->name[0] == '_') { - if (de_name.len >= 16 && - !memcmp(de_name.name + de_name.len - 16, + if (de_name.len > 32 && + !memcmp(de_name.name + ((de_name.len - 17) & ~15), fname->crypto_buf.name + 8, 16)) goto found; goto not_match; From 43a1c1ff12c0662769b051ea06a19bdba165fccf Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 3 Apr 2017 08:20:59 +0200 Subject: [PATCH 124/465] serial: samsung: Use right device for DMA-mapping calls commit 768d64f491a530062ddad50e016fb27125f8bd7c upstream. Driver should provide its own struct device for all DMA-mapping calls instead of extracting device pointer from DMA engine channel. Although this is harmless from the driver operation perspective on ARM architecture, it is always good to use the DMA mapping API in a proper way. This patch fixes following DMA API debug warning: WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1241 check_sync+0x520/0x9f4 samsung-uart 12c20000.serial: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x000000006df0f580] [size=64 bytes] Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.11.0-rc1-00137-g07ca963 #51 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x84/0xa0) [] (dump_stack) from [] (__warn+0x14c/0x180) [] (__warn) from [] (warn_slowpath_fmt+0x48/0x50) [] (warn_slowpath_fmt) from [] (check_sync+0x520/0x9f4) [] (check_sync) from [] (debug_dma_sync_single_for_device+0x88/0xc8) [] (debug_dma_sync_single_for_device) from [] (s3c24xx_serial_start_tx_dma+0x100/0x2f8) [] (s3c24xx_serial_start_tx_dma) from [] (s3c24xx_serial_tx_chars+0x198/0x33c) Reported-by: Seung-Woo Kim Fixes: 62c37eedb74c8 ("serial: samsung: add dma reqest/release functions") Signed-off-by: Marek Szyprowski Reviewed-by: Bartlomiej Zolnierkiewicz Reviewed-by: Krzysztof Kozlowski Reviewed-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 7a17aedbf902..9f3759bdb44f 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -901,14 +901,13 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) return -ENOMEM; } - dma->rx_addr = dma_map_single(dma->rx_chan->device->dev, dma->rx_buf, + dma->rx_addr = dma_map_single(p->port.dev, dma->rx_buf, dma->rx_size, DMA_FROM_DEVICE); spin_lock_irqsave(&p->port.lock, flags); /* TX buffer */ - dma->tx_addr = dma_map_single(dma->tx_chan->device->dev, - p->port.state->xmit.buf, + dma->tx_addr = dma_map_single(p->port.dev, p->port.state->xmit.buf, UART_XMIT_SIZE, DMA_TO_DEVICE); spin_unlock_irqrestore(&p->port.lock, flags); @@ -922,7 +921,7 @@ static void s3c24xx_serial_release_dma(struct s3c24xx_uart_port *p) if (dma->rx_chan) { dmaengine_terminate_all(dma->rx_chan); - dma_unmap_single(dma->rx_chan->device->dev, dma->rx_addr, + dma_unmap_single(p->port.dev, dma->rx_addr, dma->rx_size, DMA_FROM_DEVICE); kfree(dma->rx_buf); dma_release_channel(dma->rx_chan); @@ -931,7 +930,7 @@ static void s3c24xx_serial_release_dma(struct s3c24xx_uart_port *p) if (dma->tx_chan) { dmaengine_terminate_all(dma->tx_chan); - dma_unmap_single(dma->tx_chan->device->dev, dma->tx_addr, + dma_unmap_single(p->port.dev, dma->tx_addr, UART_XMIT_SIZE, DMA_TO_DEVICE); dma_release_channel(dma->tx_chan); dma->tx_chan = NULL; From cd823aa0bc50184ae104bac88904ab843763c27c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 3 Apr 2017 08:21:00 +0200 Subject: [PATCH 125/465] serial: samsung: Add missing checks for dma_map_single failure commit 500fcc08a32bfd54f11951ba81530775df15c474 upstream. This patch adds missing checks for dma_map_single() failure and proper error reporting. Although this issue was harmless on ARM architecture, it is always good to use the DMA mapping API in a proper way. This patch fixes the following DMA API debug warning: WARNING: CPU: 1 PID: 3785 at lib/dma-debug.c:1171 check_unmap+0x8a0/0xf28 dma-pl330 121a0000.pdma: DMA-API: device driver failed to check map error[device address=0x000000006e0f9000] [size=4096 bytes] [mapped as single] Modules linked in: CPU: 1 PID: 3785 Comm: (agetty) Tainted: G W 4.11.0-rc1-00137-g07ca963-dirty #59 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x84/0xa0) [] (dump_stack) from [] (__warn+0x14c/0x180) [] (__warn) from [] (warn_slowpath_fmt+0x48/0x50) [] (warn_slowpath_fmt) from [] (check_unmap+0x8a0/0xf28) [] (check_unmap) from [] (debug_dma_unmap_page+0x98/0xc8) [] (debug_dma_unmap_page) from [] (s3c24xx_serial_shutdown+0x314/0x52c) [] (s3c24xx_serial_shutdown) from [] (uart_port_shutdown+0x54/0x88) [] (uart_port_shutdown) from [] (uart_shutdown+0xd4/0x110) [] (uart_shutdown) from [] (uart_hangup+0x9c/0x208) [] (uart_hangup) from [] (__tty_hangup+0x49c/0x634) [] (__tty_hangup) from [] (tty_ioctl+0xc88/0x16e4) [] (tty_ioctl) from [] (do_vfs_ioctl+0xc4/0xd10) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x7c/0x8c) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x3c) Reported-by: Seung-Woo Kim Fixes: 62c37eedb74c8 ("serial: samsung: add dma reqest/release functions") Signed-off-by: Marek Szyprowski Reviewed-by: Bartlomiej Zolnierkiewicz Reviewed-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 9f3759bdb44f..43c84c9bb904 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -860,6 +860,7 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) { struct s3c24xx_uart_dma *dma = p->dma; unsigned long flags; + int ret; /* Default slave configuration parameters */ dma->rx_conf.direction = DMA_DEV_TO_MEM; @@ -884,8 +885,8 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) dma->tx_chan = dma_request_chan(p->port.dev, "tx"); if (IS_ERR(dma->tx_chan)) { - dma_release_channel(dma->rx_chan); - return PTR_ERR(dma->tx_chan); + ret = PTR_ERR(dma->tx_chan); + goto err_release_rx; } dmaengine_slave_config(dma->tx_chan, &dma->tx_conf); @@ -894,15 +895,17 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) dma->rx_size = PAGE_SIZE; dma->rx_buf = kmalloc(dma->rx_size, GFP_KERNEL); - if (!dma->rx_buf) { - dma_release_channel(dma->rx_chan); - dma_release_channel(dma->tx_chan); - return -ENOMEM; + ret = -ENOMEM; + goto err_release_tx; } dma->rx_addr = dma_map_single(p->port.dev, dma->rx_buf, dma->rx_size, DMA_FROM_DEVICE); + if (dma_mapping_error(p->port.dev, dma->rx_addr)) { + ret = -EIO; + goto err_free_rx; + } spin_lock_irqsave(&p->port.lock, flags); @@ -911,8 +914,23 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) UART_XMIT_SIZE, DMA_TO_DEVICE); spin_unlock_irqrestore(&p->port.lock, flags); + if (dma_mapping_error(p->port.dev, dma->tx_addr)) { + ret = -EIO; + goto err_unmap_rx; + } return 0; + +err_unmap_rx: + dma_unmap_single(p->port.dev, dma->rx_addr, dma->rx_size, + DMA_FROM_DEVICE); +err_free_rx: + kfree(dma->rx_buf); +err_release_tx: + dma_release_channel(dma->tx_chan); +err_release_rx: + dma_release_channel(dma->rx_chan); + return ret; } static void s3c24xx_serial_release_dma(struct s3c24xx_uart_port *p) From 9e85b5c73a64d0b4ac54afcdad690b66efa5c52f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 10 Apr 2017 11:21:38 +0200 Subject: [PATCH 126/465] serial: omap: fix runtime-pm handling on unbind commit 099bd73dc17ed77aa8c98323e043613b6e8f54fc upstream. An unbalanced and misplaced synchronous put was used to suspend the device on driver unbind, something which with a likewise misplaced pm_runtime_disable leads to external aborts when an open port is being removed. Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa024010 ... [] (serial_omap_set_mctrl) from [] (uart_update_mctrl+0x50/0x60) [] (uart_update_mctrl) from [] (uart_shutdown+0xbc/0x138) [] (uart_shutdown) from [] (uart_hangup+0x94/0x190) [] (uart_hangup) from [] (__tty_hangup+0x404/0x41c) [] (__tty_hangup) from [] (tty_vhangup+0x1c/0x20) [] (tty_vhangup) from [] (uart_remove_one_port+0xec/0x260) [] (uart_remove_one_port) from [] (serial_omap_remove+0x40/0x60) [] (serial_omap_remove) from [] (platform_drv_remove+0x34/0x4c) Fix this up by resuming the device before deregistering the port and by suspending and disabling runtime pm only after the port has been removed. Also make sure to disable autosuspend before disabling runtime pm so that the usage count is balanced and device actually suspended before returning. Note that due to a negative autosuspend delay being set in probe, the unbalanced put would actually suspend the device on first driver unbind, while rebinding and again unbinding would result in a negative power.usage_count. Fixes: 7e9c8e7dbf3b ("serial: omap: make sure to suspend device before remove") Cc: Felipe Balbi Cc: Santosh Shilimkar Signed-off-by: Johan Hovold Acked-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/omap-serial.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 6c6f82ad8d5c..8c2086e14b51 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1780,9 +1780,13 @@ static int serial_omap_remove(struct platform_device *dev) { struct uart_omap_port *up = platform_get_drvdata(dev); + pm_runtime_get_sync(up->dev); + + uart_remove_one_port(&serial_omap_reg, &up->port); + + pm_runtime_dont_use_autosuspend(up->dev); pm_runtime_put_sync(up->dev); pm_runtime_disable(up->dev); - uart_remove_one_port(&serial_omap_reg, &up->port); pm_qos_remove_request(&up->pm_qos_request); device_init_wakeup(&dev->dev, false); From aad32798dc881faec3f64c6b7d6c19f3329546a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 10 Apr 2017 11:21:39 +0200 Subject: [PATCH 127/465] serial: omap: suspend device on probe errors commit 77e6fe7fd2b7cba0bf2f2dc8cde51d7b9a35bf74 upstream. Make sure to actually suspend the device before returning after a failed (or deferred) probe. Note that autosuspend must be disabled before runtime pm is disabled in order to balance the usage count due to a negative autosuspend delay as well as to make the final put suspend the device synchronously. Fixes: 388bc2622680 ("omap-serial: Fix the error handling in the omap_serial probe") Cc: Shubhrajyoti D Signed-off-by: Johan Hovold Acked-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/omap-serial.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 8c2086e14b51..e4210b9ad0d3 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1767,7 +1767,8 @@ static int serial_omap_probe(struct platform_device *pdev) return 0; err_add_port: - pm_runtime_put(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_qos_remove_request(&up->pm_qos_request); device_init_wakeup(up->dev, false); From 34e01f920739beb9e5c2161d874cdd018343aa3a Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Wed, 22 Feb 2017 19:37:08 +0800 Subject: [PATCH 128/465] tty: pty: Fix ldisc flush after userspace become aware of the data already commit 77dae6134440420bac334581a3ccee94cee1c054 upstream. While using emacs, cat or others' commands in konsole with recent kernels, I have met many times that CTRL-C freeze konsole. After konsole freeze I can't type anything, then I have to open a new one, it is very annoying. See bug report: https://bugs.kde.org/show_bug.cgi?id=175283 The platform in that bug report is Solaris, but now the pty in linux has the same problem or the same behavior as Solaris :) It has high possibility to trigger the problem follow steps below: Note: In my test, BigFile is a text file whose size is bigger than 1G 1:open konsole 1:cat BigFile 2:CTRL-C After some digging, I find out the reason is that commit 1d1d14da12e7 ("pty: Fix buffer flush deadlock") changes the behavior of pty_flush_buffer. Thread A Thread B -------- -------- 1:n_tty_poll return POLLIN 2:CTRL-C trigger pty_flush_buffer tty_buffer_flush n_tty_flush_buffer 3:attempt to check count of chars: ioctl(fd, TIOCINQ, &available) available is equal to 0 4:read(fd, buffer, avaiable) return 0 5:konsole close fd Yes, I know we could use the same patch included in the BUG report as a workaround for linux platform too. But I think the data in ldisc is belong to application of another side, we shouldn't clear it when we want to flush write buffer of this side in pty_flush_buffer. So I think it is better to disable ldisc flush in pty_flush_buffer, because its new hehavior bring no benefit except that it mess up the behavior between POLLIN, and TIOCINQ or FIONREAD. Also I find no flush_buffer function in others' tty driver has the same behavior as current pty_flush_buffer. Fixes: 1d1d14da12e7 ("pty: Fix buffer flush deadlock") Signed-off-by: Wang YanQing Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 66b59a15780d..65799575c666 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -216,16 +216,11 @@ static int pty_signal(struct tty_struct *tty, int sig) static void pty_flush_buffer(struct tty_struct *tty) { struct tty_struct *to = tty->link; - struct tty_ldisc *ld; if (!to) return; - ld = tty_ldisc_ref(to); - tty_buffer_flush(to, ld); - if (ld) - tty_ldisc_deref(ld); - + tty_buffer_flush(to, NULL); if (to->packet) { spin_lock_irq(&tty->ctrl_lock); tty->ctrl_status |= TIOCPKT_FLUSHWRITE; From 01f9853326b0ffd56ad25fec21064eef637e1ec0 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 13 Apr 2017 08:55:08 -0500 Subject: [PATCH 129/465] tty: pl011: use "qdf2400_e44" as the earlycon name for QDF2400 E44 commit 5a0722b898f851b9ef108ea7babc529e4efc773d upstream. Define a new early console name for Qualcomm Datacenter Technologies QDF2400 SOCs affected by erratum 44, instead of piggy-backing on "pl011". Previously, to enable traditional (non-SPCR) earlycon, the documentation said to specify "earlycon=pl011,
,qdf2400_e44", but the code was broken and this didn't actually work. So instead, the method for specifying the E44 work-around with traditional earlycon is "earlycon=qdf2400_e44,
". Both methods of earlycon are now enabled with the same function. Fixes: e53e597fd4c4 ("tty: pl011: fix earlycon work-around for QDF2400 erratum 44") Signed-off-by: Timur Tabi Tested-by: Shanker Donthineni Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index b0a377725d63..f2503d862f3a 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2470,19 +2470,34 @@ static int __init pl011_early_console_setup(struct earlycon_device *device, if (!device->port.membase) return -ENODEV; - /* On QDF2400 SOCs affected by Erratum 44, the "qdf2400_e44" must - * also be specified, e.g. "earlycon=pl011,
,qdf2400_e44". - */ - if (!strcmp(device->options, "qdf2400_e44")) - device->con->write = qdf2400_e44_early_write; - else - device->con->write = pl011_early_write; + device->con->write = pl011_early_write; return 0; } OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup); -EARLYCON_DECLARE(qdf2400_e44, pl011_early_console_setup); + +/* + * On Qualcomm Datacenter Technologies QDF2400 SOCs affected by + * Erratum 44, traditional earlycon can be enabled by specifying + * "earlycon=qdf2400_e44,
". Any options are ignored. + * + * Alternatively, you can just specify "earlycon", and the early console + * will be enabled with the information from the SPCR table. In this + * case, the SPCR code will detect the need for the E44 work-around, + * and set the console name to "qdf2400_e44". + */ +static int __init +qdf2400_e44_early_console_setup(struct earlycon_device *device, + const char *opt) +{ + if (!device->port.membase) + return -ENODEV; + + device->con->write = qdf2400_e44_early_write; + return 0; +} +EARLYCON_DECLARE(qdf2400_e44, qdf2400_e44_early_console_setup); #else #define AMBA_CONSOLE NULL From ded39dd9f00affd2170cf7eeb737f681c0f88b4f Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 24 Apr 2017 18:25:04 -0700 Subject: [PATCH 130/465] Bluetooth: Fix user channel for 32bit userspace on 64bit kernel commit ab89f0bdd63a3721f7cd3f064f39fc4ac7ca14d4 upstream. Running 32bit userspace on 64bit kernel results in MSG_CMSG_COMPAT being defined as 0x80000000. This results in sendmsg failure if used from 32bit userspace running on 64bit kernel. Fix this by accounting for MSG_CMSG_COMPAT in flags check in hci_sock_sendmsg. Signed-off-by: Szymon Janc Signed-off-by: Marko Kiiskila Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index f64d6566021f..638bf0e1a2e3 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1680,7 +1680,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE)) + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE| + MSG_CMSG_COMPAT)) return -EINVAL; if (len < 4 || len > HCI_MAX_FRAME_SIZE) From a86af7983d81ad4ff2de3bc1a2d05da8ce960d8c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 29 Mar 2017 18:15:27 +0200 Subject: [PATCH 131/465] Bluetooth: hci_bcm: add missing tty-device sanity check commit 95065a61e9bf25fb85295127fba893200c2bbbd8 upstream. Make sure to check the tty-device pointer before looking up the sibling platform device to avoid dereferencing a NULL-pointer when the tty is one end of a Unix98 pty. Fixes: 0395ffc1ee05 ("Bluetooth: hci_bcm: Add PM for BCM devices") Cc: Frederic Danis Signed-off-by: Johan Hovold Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_bcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 5262a2077d7a..11f30e5cec2c 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -287,6 +287,9 @@ static int bcm_open(struct hci_uart *hu) hu->priv = bcm; + if (!hu->tty->dev) + goto out; + mutex_lock(&bcm_device_lock); list_for_each(p, &bcm_device_list) { struct bcm_device *dev = list_entry(p, struct bcm_device, list); @@ -307,7 +310,7 @@ static int bcm_open(struct hci_uart *hu) } mutex_unlock(&bcm_device_lock); - +out: return 0; } From 740f485dae696753d0c776abd1d5f6f58b4fb738 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 29 Mar 2017 18:15:28 +0200 Subject: [PATCH 132/465] Bluetooth: hci_intel: add missing tty-device sanity check commit dcb9cfaa5ea9aa0ec08aeb92582ccfe3e4c719a9 upstream. Make sure to check the tty-device pointer before looking up the sibling platform device to avoid dereferencing a NULL-pointer when the tty is one end of a Unix98 pty. Fixes: 74cdad37cd24 ("Bluetooth: hci_intel: Add runtime PM support") Fixes: 1ab1f239bf17 ("Bluetooth: hci_intel: Add support for platform driver") Cc: Loic Poulain Signed-off-by: Johan Hovold Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_intel.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 9e271286c5e5..73306384af6c 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -307,6 +307,9 @@ static int intel_set_power(struct hci_uart *hu, bool powered) struct list_head *p; int err = -ENODEV; + if (!hu->tty->dev) + return err; + mutex_lock(&intel_device_list_lock); list_for_each(p, &intel_device_list) { @@ -379,6 +382,9 @@ static void intel_busy_work(struct work_struct *work) struct intel_data *intel = container_of(work, struct intel_data, busy_work); + if (!intel->hu->tty->dev) + return; + /* Link is busy, delay the suspend */ mutex_lock(&intel_device_list_lock); list_for_each(p, &intel_device_list) { @@ -889,6 +895,8 @@ static int intel_setup(struct hci_uart *hu) list_for_each(p, &intel_device_list) { struct intel_device *dev = list_entry(p, struct intel_device, list); + if (!hu->tty->dev) + break; if (hu->tty->dev->parent == dev->pdev->dev.parent) { if (device_may_wakeup(&dev->pdev->dev)) { set_bit(STATE_LPM_ENABLED, &intel->flags); @@ -1056,6 +1064,9 @@ static int intel_enqueue(struct hci_uart *hu, struct sk_buff *skb) BT_DBG("hu %p skb %p", hu, skb); + if (!hu->tty->dev) + goto out_enqueue; + /* Be sure our controller is resumed and potential LPM transaction * completed before enqueuing any packet. */ @@ -1072,7 +1083,7 @@ static int intel_enqueue(struct hci_uart *hu, struct sk_buff *skb) } } mutex_unlock(&intel_device_list_lock); - +out_enqueue: skb_queue_tail(&intel->txq, skb); return 0; From a5938c093a1827a7262afc925a6a1032842af096 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Apr 2017 15:14:55 -0400 Subject: [PATCH 133/465] cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc() commit a590b90d472f2c176c140576ee3ab44df7f67839 upstream. cgroup_get() expected to be called only on live cgroups and triggers warning on a dead cgroup; however, cgroup_sk_alloc() may be called while cloning a socket which is left in an empty and removed cgroup and thus may legitimately duplicate its reference on a dead cgroup. This currently triggers the following warning spuriously. WARNING: CPU: 14 PID: 0 at kernel/cgroup.c:490 cgroup_get+0x55/0x60 ... [] __warn+0xd3/0xf0 [] warn_slowpath_null+0x1e/0x20 [] cgroup_get+0x55/0x60 [] cgroup_sk_alloc+0x51/0xe0 [] sk_clone_lock+0x2db/0x390 [] inet_csk_clone_lock+0x16/0xc0 [] tcp_create_openreq_child+0x23/0x4b0 [] tcp_v6_syn_recv_sock+0x91/0x670 [] tcp_check_req+0x3a6/0x4e0 [] tcp_v6_rcv+0x693/0xa00 [] ip6_input_finish+0x59/0x3e0 [] ip6_input+0x32/0xb0 [] ip6_rcv_finish+0x57/0xa0 [] ipv6_rcv+0x318/0x4d0 [] __netif_receive_skb_core+0x2d7/0x9a0 [] __netif_receive_skb+0x16/0x70 [] netif_receive_skb_internal+0x23/0x80 [] napi_gro_frags+0x208/0x270 [] mlx4_en_process_rx_cq+0x74c/0xf40 [] mlx4_en_poll_rx_cq+0x30/0x90 [] net_rx_action+0x210/0x350 [] __do_softirq+0x106/0x2c7 [] irq_exit+0x9d/0xa0 [] do_IRQ+0x54/0xd0 [] common_interrupt+0x7f/0x7f [] cpuidle_enter+0x17/0x20 [] cpu_startup_entry+0x2a9/0x2f0 [] start_secondary+0xf1/0x100 This patch renames the existing cgroup_get() with the dead cgroup warning to cgroup_get_live() after cgroup_kn_lock_live() and introduces the new cgroup_get() which doesn't check whether the cgroup is live or dead. All existing cgroup_get() users except for cgroup_sk_alloc() are converted to use cgroup_get_live(). Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") Cc: Johannes Weiner Reported-by: Chris Mason Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 687f5e0194ef..b507f1889a72 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -437,6 +437,11 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, } static void cgroup_get(struct cgroup *cgrp) +{ + css_get(&cgrp->self); +} + +static void cgroup_get_live(struct cgroup *cgrp) { WARN_ON_ONCE(cgroup_is_dead(cgrp)); css_get(&cgrp->self); @@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, list_add_tail(&link->cgrp_link, &cset->cgrp_links); if (cgroup_parent(cgrp)) - cgroup_get(cgrp); + cgroup_get_live(cgrp); } /** @@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, return ERR_PTR(-EINVAL); } cgrp_dfl_visible = true; - cgroup_get(&cgrp_dfl_root.cgrp); + cgroup_get_live(&cgrp_dfl_root.cgrp); dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, CGROUP2_SUPER_MAGIC, ns); @@ -2576,7 +2581,7 @@ void cgroup_lock_and_drain_offline(struct cgroup *cgrp) if (!css || !percpu_ref_is_dying(&css->refcnt)) continue; - cgroup_get(dsct); + cgroup_get_live(dsct); prepare_to_wait(&dsct->offline_waitq, &wait, TASK_UNINTERRUPTIBLE); @@ -3947,7 +3952,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, { lockdep_assert_held(&cgroup_mutex); - cgroup_get(cgrp); + cgroup_get_live(cgrp); memset(css, 0, sizeof(*css)); css->cgroup = cgrp; @@ -4123,7 +4128,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent) /* allocation complete, commit to creation */ list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); atomic_inc(&root->nr_cgrps); - cgroup_get(parent); + cgroup_get_live(parent); /* * @cgrp is now fully operational. If something fails after this @@ -4947,7 +4952,7 @@ struct cgroup *cgroup_get_from_path(const char *path) if (kn) { if (kernfs_type(kn) == KERNFS_DIR) { cgrp = kn->priv; - cgroup_get(cgrp); + cgroup_get_live(cgrp); } else { cgrp = ERR_PTR(-ENOTDIR); } @@ -5027,6 +5032,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) /* Socket clone path */ if (skcd->val) { + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ cgroup_get(sock_cgroup_ptr(skcd)); return; } From 8f0fde5bc9a58d5b0a0d8a3857fbc19b6d859eda Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Apr 2017 13:39:54 +0200 Subject: [PATCH 134/465] libata: reject passthrough WRITE SAME requests commit c6ade20f5e50e188d20b711a618b20dd1d50457e upstream. The WRITE SAME to TRIM translation rewrites the DATA OUT buffer. While the SCSI code accomodates for this by passing a read-writable buffer userspace applications don't cater for this behavior. In fact it can be used to rewrite e.g. a readonly file through mmap and should be considered as a security fix. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1ac70744ae7b..50f56d066936 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3462,6 +3462,14 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) if (unlikely(!dev->dma_mode)) goto invalid_opcode; + /* + * We only allow sending this command through the block layer, + * as it modifies the DATA OUT buffer, which would corrupt user + * memory for SG_IO commands. + */ + if (unlikely(blk_rq_is_passthrough(scmd->request))) + goto invalid_opcode; + if (unlikely(scmd->cmd_len < 16)) { fp = 15; goto invalid_fld; From d517be513311fd5800ed05d1e5f47b2337fbab9c Mon Sep 17 00:00:00 2001 From: Joeseph Chang Date: Mon, 27 Mar 2017 20:22:09 -0600 Subject: [PATCH 135/465] ipmi: Fix kernel panic at ipmi_ssif_thread() commit 6de65fcfdb51835789b245203d1bfc8d14cb1e06 upstream. msg_written_handler() may set ssif_info->multi_data to NULL when using ipmitool to write fru. Before setting ssif_info->multi_data to NULL, add new local pointer "data_to_send" and store correct i2c data pointer to it to fix NULL pointer kernel panic and incorrect ssif_info->multi_pos. Signed-off-by: Joeseph Chang Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index cca6e5bc1cea..51ba67de862e 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -891,6 +891,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, * for details on the intricacies of this. */ int left; + unsigned char *data_to_send; ssif_inc_stat(ssif_info, sent_messages_parts); @@ -899,6 +900,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, left = 32; /* Length byte. */ ssif_info->multi_data[ssif_info->multi_pos] = left; + data_to_send = ssif_info->multi_data + ssif_info->multi_pos; ssif_info->multi_pos += left; if (left < 32) /* @@ -912,7 +914,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, rv = ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE, SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE, - ssif_info->multi_data + ssif_info->multi_pos, + data_to_send, I2C_SMBUS_BLOCK_DATA); if (rv < 0) { /* request failed, just return the error. */ From d2572f5b0104a07040e086a46c0e431bd3b06b9f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 24 Apr 2017 15:43:05 -0700 Subject: [PATCH 136/465] libnvdimm, region: fix flush hint detection crash commit bc042fdfbb92b5b13421316b4548e2d6e98eed37 upstream. In the case where a dimm does not have any associated flush hints the ndrd->flush_wpq array may be uninitialized leading to crashes with the following signature: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: region_visible+0x10f/0x160 [libnvdimm] Call Trace: internal_create_group+0xbe/0x2f0 sysfs_create_groups+0x40/0x80 device_add+0x2d8/0x650 nd_async_device_register+0x12/0x40 [libnvdimm] async_run_entry_fn+0x39/0x170 process_one_work+0x212/0x6c0 ? process_one_work+0x197/0x6c0 worker_thread+0x4e/0x4a0 kthread+0x10c/0x140 ? process_one_work+0x6c0/0x6c0 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x31/0x40 Reviewed-by: Jeff Moyer Fixes: f284a4f23752 ("libnvdimm: introduce nvdimm_flush() and nvdimm_has_flush()") Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/region_devs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index b7cb5066d961..378885f4050b 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -968,17 +968,20 @@ EXPORT_SYMBOL_GPL(nvdimm_flush); */ int nvdimm_has_flush(struct nd_region *nd_region) { - struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); int i; /* no nvdimm == flushing capability unknown */ if (nd_region->ndr_mappings == 0) return -ENXIO; - for (i = 0; i < nd_region->ndr_mappings; i++) - /* flush hints present, flushing required */ - if (ndrd_get_flush_wpq(ndrd, i, 0)) + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + /* flush hints present / available */ + if (nvdimm->num_flush) return 1; + } /* * The platform defines dimm devices without hints, assume From de21b800a6455b6745158facdf5417e3670e2cee Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 25 Apr 2017 17:04:13 -0600 Subject: [PATCH 137/465] libnvdimm, pmem: fix a NULL pointer BUG in nd_pmem_notify commit b2518c78ce76896f0f8f7940bf02104b227e1709 upstream. The following BUG was observed when nd_pmem_notify() was called for a BTT device. The use of a pmem_device pointer is not valid with BTT. BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 IP: nd_pmem_notify+0x30/0xf0 [nd_pmem] Call Trace: nd_device_notify+0x40/0x50 child_notify+0x10/0x20 device_for_each_child+0x50/0x90 nd_region_notify+0x20/0x30 nd_device_notify+0x40/0x50 nvdimm_region_notify+0x27/0x30 acpi_nfit_scrub+0x341/0x590 [nfit] process_one_work+0x197/0x450 worker_thread+0x4e/0x4a0 kthread+0x109/0x140 Fix nd_pmem_notify() by setting nd_region and badblocks pointers properly for BTT. Cc: Vishal Verma Fixes: 719994660c24 ("libnvdimm: async notification support") Signed-off-by: Toshi Kani Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/pmem.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 5b536be5a12e..0fc18262a2bc 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -388,12 +388,12 @@ static void nd_pmem_shutdown(struct device *dev) static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) { - struct pmem_device *pmem = dev_get_drvdata(dev); - struct nd_region *nd_region = to_region(pmem); + struct nd_region *nd_region; resource_size_t offset = 0, end_trunc = 0; struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; struct resource res; + struct badblocks *bb; if (event != NVDIMM_REVALIDATE_POISON) return; @@ -402,20 +402,33 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) struct nd_btt *nd_btt = to_nd_btt(dev); ndns = nd_btt->ndns; - } else if (is_nd_pfn(dev)) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + nd_region = to_nd_region(ndns->dev.parent); + nsio = to_nd_namespace_io(&ndns->dev); + bb = &nsio->bb; + } else { + struct pmem_device *pmem = dev_get_drvdata(dev); - ndns = nd_pfn->ndns; - offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad); - end_trunc = __le32_to_cpu(pfn_sb->end_trunc); - } else - ndns = to_ndns(dev); + nd_region = to_region(pmem); + bb = &pmem->bb; + + if (is_nd_pfn(dev)) { + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + + ndns = nd_pfn->ndns; + offset = pmem->data_offset + + __le32_to_cpu(pfn_sb->start_pad); + end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + } else { + ndns = to_ndns(dev); + } + + nsio = to_nd_namespace_io(&ndns->dev); + } - nsio = to_nd_namespace_io(&ndns->dev); res.start = nsio->res.start + offset; res.end = nsio->res.end - end_trunc; - nvdimm_badblocks_populate(nd_region, &pmem->bb, &res); + nvdimm_badblocks_populate(nd_region, bb, &res); } MODULE_ALIAS("pmem"); From a3ff3ebdf32a96dbb5873b72971e1148c8e260c5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Apr 2017 22:05:14 -0700 Subject: [PATCH 138/465] libnvdimm: fix nvdimm_bus_lock() vs device_lock() ordering commit 452bae0aede774f87bf56c28b6dd50b72c78986c upstream. A debug patch to turn the standard device_lock() into something that lockdep can analyze yielded the following: ====================================================== [ INFO: possible circular locking dependency detected ] 4.11.0-rc4+ #106 Tainted: G O ------------------------------------------------------- lt-libndctl/1898 is trying to acquire lock: (&dev->nvdimm_mutex/3){+.+.+.}, at: [] nd_attach_ndns+0x178/0x1b0 [libnvdimm] but task is already holding lock: (&nvdimm_bus->reconfig_mutex){+.+.+.}, at: [] nvdimm_bus_lock+0x21/0x30 [libnvdimm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&nvdimm_bus->reconfig_mutex){+.+.+.}: lock_acquire+0xf6/0x1f0 __mutex_lock+0x88/0x980 mutex_lock_nested+0x1b/0x20 nvdimm_bus_lock+0x21/0x30 [libnvdimm] nvdimm_namespace_capacity+0x1b/0x40 [libnvdimm] nvdimm_namespace_common_probe+0x230/0x510 [libnvdimm] nd_pmem_probe+0x14/0x180 [nd_pmem] nvdimm_bus_probe+0xa9/0x260 [libnvdimm] -> #0 (&dev->nvdimm_mutex/3){+.+.+.}: __lock_acquire+0x1107/0x1280 lock_acquire+0xf6/0x1f0 __mutex_lock+0x88/0x980 mutex_lock_nested+0x1b/0x20 nd_attach_ndns+0x178/0x1b0 [libnvdimm] nd_namespace_store+0x308/0x3c0 [libnvdimm] namespace_store+0x87/0x220 [libnvdimm] In this case '&dev->nvdimm_mutex/3' mirrors '&dev->mutex'. Fix this by replacing the use of device_lock() with nvdimm_bus_lock() to protect nd_{attach,detach}_ndns() operations. Fixes: 8c2f7e8658df ("libnvdimm: infrastructure for btt devices") Reported-by: Yi Zhang Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/btt_devs.c | 2 +- drivers/nvdimm/claim.c | 23 +++++++++++++++-------- drivers/nvdimm/dax_devs.c | 2 +- drivers/nvdimm/pfn_devs.c | 2 +- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 97dd2925ed6e..4b76af2b8715 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -314,7 +314,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) if (rc < 0) { struct nd_btt *nd_btt = to_nd_btt(btt_dev); - __nd_detach_ndns(btt_dev, &nd_btt->ndns); + nd_detach_ndns(btt_dev, &nd_btt->ndns); put_device(btt_dev); } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index ca6d572c48fc..8513c8ac963b 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -21,8 +21,13 @@ void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) { struct nd_namespace_common *ndns = *_ndns; + struct nvdimm_bus *nvdimm_bus; - lockdep_assert_held(&ndns->dev.mutex); + if (!ndns) + return; + + nvdimm_bus = walk_to_nvdimm_bus(&ndns->dev); + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__); ndns->claim = NULL; *_ndns = NULL; @@ -37,18 +42,20 @@ void nd_detach_ndns(struct device *dev, if (!ndns) return; get_device(&ndns->dev); - device_lock(&ndns->dev); + nvdimm_bus_lock(&ndns->dev); __nd_detach_ndns(dev, _ndns); - device_unlock(&ndns->dev); + nvdimm_bus_unlock(&ndns->dev); put_device(&ndns->dev); } bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, struct nd_namespace_common **_ndns) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&attach->dev); + if (attach->claim) return false; - lockdep_assert_held(&attach->dev.mutex); + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__); attach->claim = dev; *_ndns = attach; @@ -61,9 +68,9 @@ bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, { bool claimed; - device_lock(&attach->dev); + nvdimm_bus_lock(&attach->dev); claimed = __nd_attach_ndns(dev, attach, _ndns); - device_unlock(&attach->dev); + nvdimm_bus_unlock(&attach->dev); return claimed; } @@ -114,7 +121,7 @@ static void nd_detach_and_reset(struct device *dev, struct nd_namespace_common **_ndns) { /* detach the namespace and destroy / reset the device */ - nd_detach_ndns(dev, _ndns); + __nd_detach_ndns(dev, _ndns); if (is_idle(dev, *_ndns)) { nd_device_unregister(dev, ND_ASYNC); } else if (is_nd_btt(dev)) { @@ -184,7 +191,7 @@ ssize_t nd_namespace_store(struct device *dev, } WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); - if (!nd_attach_ndns(dev, ndns, _ndns)) { + if (!__nd_attach_ndns(dev, ndns, _ndns)) { dev_dbg(dev, "%s already claimed\n", dev_name(&ndns->dev)); len = -EBUSY; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 45fa82cae87c..c1b6556aea6e 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -124,7 +124,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) dev_dbg(dev, "%s: dax: %s\n", __func__, rc == 0 ? dev_name(dax_dev) : ""); if (rc < 0) { - __nd_detach_ndns(dax_dev, &nd_pfn->ndns); + nd_detach_ndns(dax_dev, &nd_pfn->ndns); put_device(dax_dev); } else __nd_device_register(dax_dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 6c033c9a2f06..c38566f4da7d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -484,7 +484,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) dev_dbg(dev, "%s: pfn: %s\n", __func__, rc == 0 ? dev_name(pfn_dev) : ""); if (rc < 0) { - __nd_detach_ndns(pfn_dev, &nd_pfn->ndns); + nd_detach_ndns(pfn_dev, &nd_pfn->ndns); put_device(pfn_dev); } else __nd_device_register(pfn_dev); From af0eb80e8eff79a6c70a1b2322e41141bed13a80 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 4 May 2017 19:54:42 -0700 Subject: [PATCH 139/465] libnvdimm, pfn: fix 'npfns' vs section alignment commit d5483feda85a8f39ee2e940e279547c686aac30c upstream. Fix failures to create namespaces due to the vmem_altmap not advertising enough free space to store the memmap. WARNING: CPU: 15 PID: 8022 at arch/x86/mm/init_64.c:656 arch_add_memory+0xde/0xf0 [..] Call Trace: dump_stack+0x63/0x83 __warn+0xcb/0xf0 warn_slowpath_null+0x1d/0x20 arch_add_memory+0xde/0xf0 devm_memremap_pages+0x244/0x440 pmem_attach_disk+0x37e/0x490 [nd_pmem] nd_pmem_probe+0x7e/0xa0 [nd_pmem] nvdimm_bus_probe+0x71/0x120 [libnvdimm] driver_probe_device+0x2bb/0x460 bind_store+0x114/0x160 drv_attr_store+0x25/0x30 In commit 658922e57b84 "libnvdimm, pfn: fix memmap reservation sizing" we arranged for the capacity to be allocated, but failed to also update the 'npfns' parameter. This leads to cases where there is enough capacity reserved to hold all the allocated sections, but vmemmap_populate_hugepages() still encounters -ENOMEM from altmap_alloc_block_buf(). This fix is a stop-gap until we can teach the core memory hotplug implementation to permit sub-section hotplug. Fixes: 658922e57b84 ("libnvdimm, pfn: fix memmap reservation sizing") Reported-by: Anisha Allada Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/pfn_devs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index c38566f4da7d..335c8175410b 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -538,7 +538,8 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; + nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) + - offset) / PAGE_SIZE); if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", @@ -625,7 +626,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) */ start += start_pad; size = resource_size(&nsio->res); - npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; + npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) + / PAGE_SIZE); if (nd_pfn->mode == PFN_MODE_PMEM) { /* * vmemmap_populate_hugepages() allocates the memmap array in From f25c78c879fac44e549918f3b7201c757201cc9d Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Thu, 27 Apr 2017 17:03:13 +0530 Subject: [PATCH 140/465] pstore: Fix flags to enable dumps on powerpc commit 041939c1ec54208b42f5cd819209173d52a29d34 upstream. After commit c950fd6f201a kernel registers pstore write based on flag set. Pstore write for powerpc is broken as flags(PSTORE_FLAGS_DMESG) is not set for powerpc architecture. On panic, kernel doesn't write message to /fs/pstore/dmesg*(Entry doesn't gets created at all). This patch enables pstore write for powerpc architecture by setting PSTORE_FLAGS_DMESG flag. Fixes: c950fd6f201a ("pstore: Split pstore fragile flags") Signed-off-by: Ankit Kumar Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/nvram_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index d5e2b8309939..021db31b40ba 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -561,6 +561,7 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, static struct pstore_info nvram_pstore_info = { .owner = THIS_MODULE, .name = "nvram", + .flags = PSTORE_FLAGS_DMESG, .open = nvram_pstore_open, .read = nvram_pstore_read, .write = nvram_pstore_write, From ed8834ea4f43cf7854de62f8985b01c088ba8a75 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 5 Mar 2017 22:08:58 -0800 Subject: [PATCH 141/465] pstore: Use dynamic spinlock initializer commit e9a330c4289f2ba1ca4bf98c2b430ab165a8931b upstream. The per-prz spinlock should be using the dynamic initializer so that lockdep can correctly track it. Without this, under lockdep, we get a warning at boot that the lock is in non-static memory. Fixes: 109704492ef6 ("pstore: Make spinlock per zone instead of global") Fixes: 76d5692a5803 ("pstore: Correctly initialize spinlock and flags") Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index bc927e30bdcc..e11672aa4575 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -532,7 +532,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, } /* Initialize general buffer state. */ - prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + raw_spin_lock_init(&prz->buffer_lock); prz->flags = flags; ret = persistent_ram_buffer_map(start, size, prz, memtype); From bbc105f3878b9e69479bfc0a758b4fec64c71cd0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 6 Mar 2017 12:42:12 -0800 Subject: [PATCH 142/465] pstore: Shut down worker when unregistering commit 6330d5534786d5315d56d558aa6d20740f97d80a upstream. When built as a module and running with update_ms >= 0, pstore will Oops during module unload since the work timer is still running. This makes sure the worker is stopped before unloading. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/platform.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index efab7b64925b..b81ce8ddf14a 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -709,6 +709,7 @@ int pstore_register(struct pstore_info *psi) if (psi->flags & PSTORE_FLAGS_PMSG) pstore_register_pmsg(); + /* Start watching for new records, if desired. */ if (pstore_update_ms >= 0) { pstore_timer.expires = jiffies + msecs_to_jiffies(pstore_update_ms); @@ -731,6 +732,11 @@ EXPORT_SYMBOL_GPL(pstore_register); void pstore_unregister(struct pstore_info *psi) { + /* Stop timer and make sure all work has finished. */ + pstore_update_ms = -1; + del_timer_sync(&pstore_timer); + flush_work(&pstore_work); + if (psi->flags & PSTORE_FLAGS_PMSG) pstore_unregister_pmsg(); if (psi->flags & PSTORE_FLAGS_FTRACE) @@ -830,7 +836,9 @@ static void pstore_timefunc(unsigned long dummy) schedule_work(&pstore_work); } - mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); + if (pstore_update_ms >= 0) + mod_timer(&pstore_timer, + jiffies + msecs_to_jiffies(pstore_update_ms)); } module_param(backend, charp, 0444); From 02d86837352952eb7ca4e9370fef944a16f69206 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 20 May 2017 14:50:04 +0200 Subject: [PATCH 143/465] Linux 4.11.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9dc2aec1c2e5..d7b64830a7b7 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 11 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Fearless Coyote From cfd3c22c36127ed6a8f8a5272e69a25c907159de Mon Sep 17 00:00:00 2001 From: Maksim Salau Date: Tue, 25 Apr 2017 22:49:21 +0300 Subject: [PATCH 144/465] usb: misc: legousbtower: Fix buffers on stack commit 942a48730faf149ccbf3e12ac718aee120bb3529 upstream. Allocate buffers on HEAP instead of STACK for local structures that are to be received using usb_control_msg(). Signed-off-by: Maksim Salau Tested-by: Alfredo Rafael Vicente Boix Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/legousbtower.c | 37 ++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 322a042d6e59..f75a0c28c2ab 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -317,9 +317,16 @@ static int tower_open (struct inode *inode, struct file *file) int subminor; int retval = 0; struct usb_interface *interface; - struct tower_reset_reply reset_reply; + struct tower_reset_reply *reset_reply; int result; + reset_reply = kmalloc(sizeof(*reset_reply), GFP_KERNEL); + + if (!reset_reply) { + retval = -ENOMEM; + goto exit; + } + nonseekable_open(inode, file); subminor = iminor(inode); @@ -364,8 +371,8 @@ static int tower_open (struct inode *inode, struct file *file) USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, - &reset_reply, - sizeof(reset_reply), + reset_reply, + sizeof(*reset_reply), 1000); if (result < 0) { dev_err(&dev->udev->dev, @@ -406,6 +413,7 @@ static int tower_open (struct inode *inode, struct file *file) mutex_unlock(&dev->lock); exit: + kfree(reset_reply); return retval; } @@ -808,7 +816,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device struct lego_usb_tower *dev = NULL; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor* endpoint; - struct tower_get_version_reply get_version_reply; + struct tower_get_version_reply *get_version_reply = NULL; int i; int retval = -ENOMEM; int result; @@ -886,6 +894,13 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device dev->interrupt_in_interval = interrupt_in_interval ? interrupt_in_interval : dev->interrupt_in_endpoint->bInterval; dev->interrupt_out_interval = interrupt_out_interval ? interrupt_out_interval : dev->interrupt_out_endpoint->bInterval; + get_version_reply = kmalloc(sizeof(*get_version_reply), GFP_KERNEL); + + if (!get_version_reply) { + retval = -ENOMEM; + goto error; + } + /* get the firmware version and log it */ result = usb_control_msg (udev, usb_rcvctrlpipe(udev, 0), @@ -893,18 +908,19 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, - &get_version_reply, - sizeof(get_version_reply), + get_version_reply, + sizeof(*get_version_reply), 1000); if (result < 0) { dev_err(idev, "LEGO USB Tower get version control request failed\n"); retval = result; goto error; } - dev_info(&interface->dev, "LEGO USB Tower firmware version is %d.%d " - "build %d\n", get_version_reply.major, - get_version_reply.minor, - le16_to_cpu(get_version_reply.build_no)); + dev_info(&interface->dev, + "LEGO USB Tower firmware version is %d.%d build %d\n", + get_version_reply->major, + get_version_reply->minor, + le16_to_cpu(get_version_reply->build_no)); /* we can register the device now, as it is ready */ usb_set_intfdata (interface, dev); @@ -928,6 +944,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device return retval; error: + kfree(get_version_reply); tower_delete(dev); return retval; } From dff0ad3b9ce6c3a6832d9dc2c4f5e7892de35722 Mon Sep 17 00:00:00 2001 From: Maksim Salau Date: Sat, 13 May 2017 23:49:26 +0300 Subject: [PATCH 145/465] usb: misc: legousbtower: Fix memory leak commit 0bd193d62b4270a2a7a09da43ad1034c7ca5b3d3 upstream. get_version_reply is not freed if function returns with success. Fixes: 942a48730faf ("usb: misc: legousbtower: Fix buffers on stack") Reported-by: Heikki Krogerus Signed-off-by: Maksim Salau Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/legousbtower.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index f75a0c28c2ab..c74f0a6499b0 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -941,6 +941,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device USB_MAJOR, dev->minor); exit: + kfree(get_version_reply); return retval; error: From a646b2974afc2033684d75257679a544aa4cdf65 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 16 May 2017 11:47:29 -0400 Subject: [PATCH 146/465] USB: ene_usb6250: fix DMA to the stack commit 628c2893d44876ddd11602400c70606ade62e129 upstream. The ene_usb6250 sub-driver in usb-storage does USB I/O to buffers on the stack, which doesn't work with vmapped stacks. This patch fixes the problem by allocating a separate 512-byte buffer at probe time and using it for all of the offending I/O operations. Signed-off-by: Alan Stern Reported-and-tested-by: Andreas Hartmann Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/ene_ub6250.c | 90 +++++++++++++++++++------------- 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 369f3c24815a..44af719194b2 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -446,6 +446,10 @@ struct ms_lib_ctrl { #define SD_BLOCK_LEN 9 struct ene_ub6250_info { + + /* I/O bounce buffer */ + u8 *bbuf; + /* for 6250 code */ struct SD_STATUS SD_Status; struct MS_STATUS MS_Status; @@ -493,8 +497,11 @@ static int ene_load_bincode(struct us_data *us, unsigned char flag); static void ene_ub6250_info_destructor(void *extra) { + struct ene_ub6250_info *info = (struct ene_ub6250_info *) extra; + if (!extra) return; + kfree(info->bbuf); } static int ene_send_scsi_cmd(struct us_data *us, u8 fDir, void *buf, int use_sg) @@ -860,8 +867,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, u8 PageNum, u32 *PageBuf, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; + struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; int result; - u8 ExtBuf[4]; u32 bn = PhyBlockAddr * 0x20 + PageNum; result = ene_load_bincode(us, MS_RW_PATTERN); @@ -901,7 +909,7 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, bcb->CDB[2] = (unsigned char)(PhyBlockAddr>>16); bcb->CDB[6] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -910,9 +918,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, ExtraDat->status0 = 0x10; /* Not yet,fireware support */ ExtraDat->status1 = 0x00; /* Not yet,fireware support */ - ExtraDat->ovrflg = ExtBuf[0]; - ExtraDat->mngflg = ExtBuf[1]; - ExtraDat->logadr = memstick_logaddr(ExtBuf[2], ExtBuf[3]); + ExtraDat->ovrflg = bbuf[0]; + ExtraDat->mngflg = bbuf[1]; + ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } @@ -1332,8 +1340,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, u8 PageNum, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; + struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; int result; - u8 ExtBuf[4]; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); @@ -1347,7 +1356,7 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, bcb->CDB[2] = (unsigned char)(PhyBlock>>16); bcb->CDB[6] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -1355,9 +1364,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, ExtraDat->intr = 0x80; /* Not yet, waiting for fireware support */ ExtraDat->status0 = 0x10; /* Not yet, waiting for fireware support */ ExtraDat->status1 = 0x00; /* Not yet, waiting for fireware support */ - ExtraDat->ovrflg = ExtBuf[0]; - ExtraDat->mngflg = ExtBuf[1]; - ExtraDat->logadr = memstick_logaddr(ExtBuf[2], ExtBuf[3]); + ExtraDat->ovrflg = bbuf[0]; + ExtraDat->mngflg = bbuf[1]; + ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } @@ -1556,9 +1565,9 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st) u16 PhyBlock, newblk, i; u16 LogStart, LogEnde; struct ms_lib_type_extdat extdat; - u8 buf[0x200]; u32 count = 0, index = 0; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; for (PhyBlock = 0; PhyBlock < info->MS_Lib.NumberOfPhyBlock;) { ms_lib_phy_to_log_range(PhyBlock, &LogStart, &LogEnde); @@ -1572,14 +1581,16 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st) } if (count == PhyBlock) { - ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, &buf); + ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, + bbuf); count += 0x80; } index = (PhyBlock % 0x80) * 4; - extdat.ovrflg = buf[index]; - extdat.mngflg = buf[index+1]; - extdat.logadr = memstick_logaddr(buf[index+2], buf[index+3]); + extdat.ovrflg = bbuf[index]; + extdat.mngflg = bbuf[index+1]; + extdat.logadr = memstick_logaddr(bbuf[index+2], + bbuf[index+3]); if ((extdat.ovrflg & MS_REG_OVR_BKST) != MS_REG_OVR_BKST_OK) { ms_lib_setacquired_errorblock(us, PhyBlock); @@ -2062,9 +2073,9 @@ static int ene_ms_init(struct us_data *us) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; - u8 buf[0x200]; u16 MSP_BlockSize, MSP_UserAreaBlocks; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; printk(KERN_INFO "transport --- ENE_MSInit\n"); @@ -2083,13 +2094,13 @@ static int ene_ms_init(struct us_data *us) bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { printk(KERN_ERR "Execution MS Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } /* the same part to test ENE */ - info->MS_Status = *(struct MS_STATUS *)&buf[0]; + info->MS_Status = *(struct MS_STATUS *) bbuf; if (info->MS_Status.Insert && info->MS_Status.Ready) { printk(KERN_INFO "Insert = %x\n", info->MS_Status.Insert); @@ -2098,15 +2109,15 @@ static int ene_ms_init(struct us_data *us) printk(KERN_INFO "IsMSPHG = %x\n", info->MS_Status.IsMSPHG); printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP); if (info->MS_Status.IsMSPro) { - MSP_BlockSize = (buf[6] << 8) | buf[7]; - MSP_UserAreaBlocks = (buf[10] << 8) | buf[11]; + MSP_BlockSize = (bbuf[6] << 8) | bbuf[7]; + MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11]; info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks; } else { ms_card_init(us); /* Card is MS (to ms.c)*/ } usb_stor_dbg(us, "MS Init Code OK !!\n"); } else { - usb_stor_dbg(us, "MS Card Not Ready --- %x\n", buf[0]); + usb_stor_dbg(us, "MS Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } @@ -2116,9 +2127,9 @@ static int ene_ms_init(struct us_data *us) static int ene_sd_init(struct us_data *us) { int result; - u8 buf[0x200]; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; usb_stor_dbg(us, "transport --- ENE_SDInit\n"); /* SD Init Part-1 */ @@ -2152,17 +2163,17 @@ static int ene_sd_init(struct us_data *us) bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; - result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Execution SD Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } - info->SD_Status = *(struct SD_STATUS *)&buf[0]; + info->SD_Status = *(struct SD_STATUS *) bbuf; if (info->SD_Status.Insert && info->SD_Status.Ready) { struct SD_STATUS *s = &info->SD_Status; - ene_get_card_status(us, (unsigned char *)&buf); + ene_get_card_status(us, bbuf); usb_stor_dbg(us, "Insert = %x\n", s->Insert); usb_stor_dbg(us, "Ready = %x\n", s->Ready); usb_stor_dbg(us, "IsMMC = %x\n", s->IsMMC); @@ -2170,7 +2181,7 @@ static int ene_sd_init(struct us_data *us) usb_stor_dbg(us, "HiSpeed = %x\n", s->HiSpeed); usb_stor_dbg(us, "WtP = %x\n", s->WtP); } else { - usb_stor_dbg(us, "SD Card Not Ready --- %x\n", buf[0]); + usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } return USB_STOR_TRANSPORT_GOOD; @@ -2180,13 +2191,15 @@ static int ene_sd_init(struct us_data *us) static int ene_init(struct us_data *us) { int result; - u8 misc_reg03 = 0; + u8 misc_reg03; struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); + u8 *bbuf = info->bbuf; - result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03); + result = ene_get_card_type(us, REG_CARD_STATUS, bbuf); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; + misc_reg03 = bbuf[0]; if (misc_reg03 & 0x01) { if (!info->SD_Status.Ready) { result = ene_sd_init(us); @@ -2303,8 +2316,9 @@ static int ene_ub6250_probe(struct usb_interface *intf, const struct usb_device_id *id) { int result; - u8 misc_reg03 = 0; + u8 misc_reg03; struct us_data *us; + struct ene_ub6250_info *info; result = usb_stor_probe1(&us, intf, id, (id - ene_ub6250_usb_ids) + ene_ub6250_unusual_dev_list, @@ -2313,11 +2327,16 @@ static int ene_ub6250_probe(struct usb_interface *intf, return result; /* FIXME: where should the code alloc extra buf ? */ - if (!us->extra) { - us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL); - if (!us->extra) - return -ENOMEM; - us->extra_destructor = ene_ub6250_info_destructor; + us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL); + if (!us->extra) + return -ENOMEM; + us->extra_destructor = ene_ub6250_info_destructor; + + info = (struct ene_ub6250_info *)(us->extra); + info->bbuf = kmalloc(512, GFP_KERNEL); + if (!info->bbuf) { + kfree(us->extra); + return -ENOMEM; } us->transport_name = "ene_ub6250"; @@ -2329,12 +2348,13 @@ static int ene_ub6250_probe(struct usb_interface *intf, return result; /* probe card type */ - result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03); + result = ene_get_card_type(us, REG_CARD_STATUS, info->bbuf); if (result != USB_STOR_XFER_GOOD) { usb_stor_disconnect(intf); return USB_STOR_TRANSPORT_ERROR; } + misc_reg03 = info->bbuf[0]; if (!(misc_reg03 & 0x01)) { pr_info("ums_eneub6250: This driver only supports SD/MS cards. " "It does not support SM cards.\n"); From 37f84f8b993036014c6d682fa9144b9cc337a7ab Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:49:45 +0100 Subject: [PATCH 147/465] watchdog: pcwd_usb: fix NULL-deref at probe commit 46c319b848268dab3f0e7c4a5b6e9146d3bca8a4 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/pcwd_usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c index 99ebf6ea3de6..5615f4013924 100644 --- a/drivers/watchdog/pcwd_usb.c +++ b/drivers/watchdog/pcwd_usb.c @@ -630,6 +630,9 @@ static int usb_pcwd_probe(struct usb_interface *interface, return -ENODEV; } + if (iface_desc->desc.bNumEndpoints < 1) + return -ENODEV; + /* check out the endpoint: it has to be Interrupt & IN */ endpoint = &iface_desc->endpoint[0].desc; From 28c7411cdbc41396dceff7e1b37dbb659f7bdfb2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 16 May 2017 19:18:55 +0200 Subject: [PATCH 148/465] char: lp: fix possible integer overflow in lp_setup() commit 3e21f4af170bebf47c187c1ff8bf155583c9f3b1 upstream. The lp_setup() code doesn't apply any bounds checking when passing "lp=none", and only in this case, resulting in an overflow of the parport_nr[] array. All versions in Git history are affected. Reported-By: Roee Hay Cc: Ben Hutchings Signed-off-by: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/char/lp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/lp.c b/drivers/char/lp.c index 565e4cf04a02..8249762192d5 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -859,7 +859,11 @@ static int __init lp_setup (char *str) } else if (!strcmp(str, "auto")) { parport_nr[0] = LP_PARPORT_AUTO; } else if (!strcmp(str, "none")) { - parport_nr[parport_ptr++] = LP_PARPORT_NONE; + if (parport_ptr < LP_NO) + parport_nr[parport_ptr++] = LP_PARPORT_NONE; + else + printk(KERN_INFO "lp: too many ports, %s ignored.\n", + str); } else if (!strcmp(str, "reset")) { reset = 1; } From b3bf0bbc1eda78d086b905c3eab7f814b32c3a7d Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Samavedam Date: Tue, 16 May 2017 14:38:08 +0200 Subject: [PATCH 149/465] USB: core: replace %p with %pK commit 2f964780c03b73de269b08d12aff96a9618d13f3 upstream. Format specifier %p can leak kernel addresses while not valuing the kptr_restrict system settings. When kptr_restrict is set to (1), kernel pointers printed using the %pK format specifier will be replaced with Zeros. Debugging Note : &pK prints only Zeros as address. If you need actual address information, write 0 to kptr_restrict. echo 0 > /proc/sys/kernel/kptr_restrict [Found by poking around in a random vendor kernel tree, it would be nice if someone would actually send these types of patches upstream - gkh] Signed-off-by: Vamsi Krishna Samavedam Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 14 +++++++------- drivers/usb/core/hcd.c | 4 ++-- drivers/usb/core/urb.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index cfc3cff6e8d5..8e6ef671be9b 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -475,11 +475,11 @@ static void snoop_urb(struct usb_device *udev, if (userurb) { /* Async */ if (when == SUBMIT) - dev_info(&udev->dev, "userurb %p, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " "length %u\n", userurb, ep, t, d, length); else - dev_info(&udev->dev, "userurb %p, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " "actual_length %u status %d\n", userurb, ep, t, d, length, timeout_or_status); @@ -1895,7 +1895,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); return retval; @@ -1912,7 +1912,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg) as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); } else { @@ -2043,7 +2043,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); return retval; @@ -2060,7 +2060,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); } else { @@ -2489,7 +2489,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, #endif case USBDEVFS_DISCARDURB: - snoop(&dev->dev, "%s: DISCARDURB %p\n", __func__, p); + snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p); ret = proc_unlinkurb(ps, p); break; diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 79bdca5cb9c7..6a857e875633 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1722,7 +1722,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status) if (retval == 0) retval = -EINPROGRESS; else if (retval != -EIDRM && retval != -EBUSY) - dev_dbg(&udev->dev, "hcd_unlink_urb %p fail %d\n", + dev_dbg(&udev->dev, "hcd_unlink_urb %pK fail %d\n", urb, retval); usb_put_dev(udev); } @@ -1889,7 +1889,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev, /* kick hcd */ unlink1(hcd, urb, -ESHUTDOWN); dev_dbg (hcd->self.controller, - "shutdown urb %p ep%d%s%s\n", + "shutdown urb %pK ep%d%s%s\n", urb, usb_endpoint_num(&ep->desc), is_in ? "in" : "out", ({ char *s; diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index d75cb8c0f7df..47903d510955 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -338,7 +338,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) if (!urb || !urb->complete) return -EINVAL; if (urb->hcpriv) { - WARN_ONCE(1, "URB %p submitted while active\n", urb); + WARN_ONCE(1, "URB %pK submitted while active\n", urb); return -EBUSY; } From d2b8a861a6437e31a874e277422338097455a718 Mon Sep 17 00:00:00 2001 From: Alexander Steffen Date: Thu, 16 Feb 2017 15:33:36 +0000 Subject: [PATCH 150/465] tpm_tis_core: Choose appropriate timeout for reading burstcount commit 302a6ad7fc77146191126a1f3e2c5d724fd72416 upstream. TIS v1.3 for TPM 1.2 and PTP for TPM 2.0 disagree about which timeout value applies to reading a valid burstcount. It is TIMEOUT_D according to TIS, but TIMEOUT_A according to PTP, so choose the appropriate value depending on whether we deal with a TPM 1.2 or a TPM 2.0. This is important since according to the PTP TIMEOUT_D is much smaller than TIMEOUT_A. So the previous implementation could run into timeouts with a TPM 2.0, even though the TPM was behaving perfectly fine. During tpm2_probe TIMEOUT_D will be used even with a TPM 2.0, because TPM_CHIP_FLAG_TPM2 is not yet set. This is fine, since the timeout values will only be changed afterwards by tpm_get_timeouts. Until then TIS_TIMEOUT_D_MAX applies, which is large enough. Fixes: aec04cbdf723 ("tpm: TPM 2.0 FIFO Interface") Signed-off-by: Alexander Steffen Signed-off-by: Peter Huewe Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index c0f296b5d413..fc0e9a2734ed 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -160,8 +160,10 @@ static int get_burstcount(struct tpm_chip *chip) u32 value; /* wait for burstcount */ - /* which timeout value, spec has 2 answers (c & d) */ - stop = jiffies + chip->timeout_d; + if (chip->flags & TPM_CHIP_FLAG_TPM2) + stop = jiffies + chip->timeout_a; + else + stop = jiffies + chip->timeout_d; do { rc = tpm_tis_read32(priv, TPM_STS(priv->locality), &value); if (rc < 0) From 826ca313fd1068d4128f97ac6893156b5e303e9c Mon Sep 17 00:00:00 2001 From: Jeeja KP Date: Wed, 10 May 2017 11:51:58 +0530 Subject: [PATCH 151/465] ALSA: hda: Fix cpu lockup when stopping the cmd dmas commit 960013762df0a214b57f2fce655422fb52bdfd2c upstream. Using jiffies in hdac_wait_for_cmd_dmas() to determine when to time out when interrupts are off (snd_hdac_bus_stop_cmd_io()/spin_lock_irq()) causes hard lockup so unlock while waiting using jiffies. ---<-snip->--- <0>[ 1211.603046] NMI watchdog: Watchdog detected hard LOCKUP on cpu 3 <4>[ 1211.603047] Modules linked in: snd_hda_intel i915 vgem <4>[ 1211.603053] irq event stamp: 13366 <4>[ 1211.603053] hardirqs last enabled at (13365): ... <4>[ 1211.603059] Call Trace: <4>[ 1211.603059] ? delay_tsc+0x3d/0xc0 <4>[ 1211.603059] __delay+0xa/0x10 <4>[ 1211.603060] __const_udelay+0x31/0x40 <4>[ 1211.603060] snd_hdac_bus_stop_cmd_io+0x96/0xe0 [snd_hda_core] <4>[ 1211.603060] ? azx_dev_disconnect+0x20/0x20 [snd_hda_intel] <4>[ 1211.603061] snd_hdac_bus_stop_chip+0xb1/0x100 [snd_hda_core] <4>[ 1211.603061] azx_stop_chip+0x9/0x10 [snd_hda_codec] <4>[ 1211.603061] azx_suspend+0x72/0x220 [snd_hda_intel] <4>[ 1211.603061] pci_pm_suspend+0x71/0x140 <4>[ 1211.603062] dpm_run_callback+0x6f/0x330 <4>[ 1211.603062] ? pci_pm_freeze+0xe0/0xe0 <4>[ 1211.603062] __device_suspend+0xf9/0x370 <4>[ 1211.603062] ? dpm_watchdog_set+0x60/0x60 <4>[ 1211.603063] async_suspend+0x1a/0x90 <4>[ 1211.603063] async_run_entry_fn+0x34/0x160 <4>[ 1211.603063] process_one_work+0x1f4/0x6d0 <4>[ 1211.603063] ? process_one_work+0x16e/0x6d0 <4>[ 1211.603064] worker_thread+0x49/0x4a0 <4>[ 1211.603064] kthread+0x107/0x140 <4>[ 1211.603064] ? process_one_work+0x6d0/0x6d0 <4>[ 1211.603065] ? kthread_create_on_node+0x40/0x40 <4>[ 1211.603065] ret_from_fork+0x2e/0x40 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100419 Fixes: 38b19ed7f81ec ("ALSA: hda: fix to wait for RIRB & CORB DMA to set") Reported-by: Marta Lofstedt Suggested-by: Takashi Iwai Signed-off-by: Jeeja KP Acked-by: Vinod Koul Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdac_controller.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 043065867656..0f41257d339e 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -106,7 +106,11 @@ void snd_hdac_bus_stop_cmd_io(struct hdac_bus *bus) /* disable ringbuffer DMAs */ snd_hdac_chip_writeb(bus, RIRBCTL, 0); snd_hdac_chip_writeb(bus, CORBCTL, 0); + spin_unlock_irq(&bus->reg_lock); + hdac_wait_for_cmd_dmas(bus); + + spin_lock_irq(&bus->reg_lock); /* disable unsolicited responses */ snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, 0); spin_unlock_irq(&bus->reg_lock); From 580c656a955cb604aa3a02ffb8563dfbdd0214c7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 25 Apr 2017 14:29:35 +0300 Subject: [PATCH 152/465] fanotify: don't expose EOPENSTALE to userspace commit 4ff33aafd32e084f5ee7faa54ba06e95f8b1b8af upstream. When delivering an event to userspace for a file on an NFS share, if the file is deleted on server side before user reads the event, user will not get the event. If the event queue contained several events, the stale event is quietly dropped and read() returns to user with events read so far in the buffer. If the event queue contains a single stale event or if the stale event is a permission event, read() returns to user with the kernel internal error code 518 (EOPENSTALE), which is not a POSIX error code. Check the internal return value -EOPENSTALE in fanotify_read(), just the same as it is checked in path_openat() and drop the event in the cases that it is not already dropped. This is a reproducer from Marko Rauhamaa: Just take the example program listed under "man fanotify" ("fantest") and follow these steps: ============================================================== NFS Server NFS Client(1) NFS Client(2) ============================================================== # echo foo >/nfsshare/bar.txt # cat /nfsshare/bar.txt foo # ./fantest /nfsshare Press enter key to terminate. Listening for events. # rm -f /nfsshare/bar.txt # cat /nfsshare/bar.txt read: Unknown error 518 cat: /nfsshare/bar.txt: Operation not permitted ============================================================== where NFS Client (1) and (2) are two terminal sessions on a single NFS Client machine. Reported-by: Marko Rauhamaa Tested-by: Marko Rauhamaa Cc: Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/fanotify/fanotify_user.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2b37f2785834..4b3437d70e7e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -295,27 +295,37 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, } ret = copy_event_to_user(group, kevent, buf); + if (unlikely(ret == -EOPENSTALE)) { + /* + * We cannot report events with stale fd so drop it. + * Setting ret to 0 will continue the event loop and + * do the right thing if there are no more events to + * read (i.e. return bytes read, -EAGAIN or wait). + */ + ret = 0; + } + /* * Permission events get queued to wait for response. Other * events can be destroyed now. */ if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { fsnotify_destroy_event(group, kevent); - if (ret < 0) - break; } else { #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (ret < 0) { + if (ret <= 0) { FANOTIFY_PE(kevent)->response = FAN_DENY; wake_up(&group->fanotify_data.access_waitq); - break; + } else { + spin_lock(&group->notification_lock); + list_add_tail(&kevent->list, + &group->fanotify_data.access_list); + spin_unlock(&group->notification_lock); } - spin_lock(&group->notification_lock); - list_add_tail(&kevent->list, - &group->fanotify_data.access_list); - spin_unlock(&group->notification_lock); #endif } + if (ret < 0) + break; buf += ret; count -= ret; } From b0e2a93c289b416077dd15b0e448a82eb95b341e Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Thu, 2 Mar 2017 13:03:11 +0000 Subject: [PATCH 153/465] tpm_tis_spi: Use single function to transfer data commit f848f2143ae42dc0918400039257a893835254d1 upstream. The algorithm for sending data to the TPM is mostly identical to the algorithm for receiving data from the TPM, so a single function is sufficient to handle both cases. This is a prequisite for all the other fixes, so we don't have to fix everything twice (send/receive) v2: u16 instead of u8 for the length. Fixes: 0edbfea537d1 ("tpm/tpm_tis_spi: Add support for spi phy") Signed-off-by: Alexander Steffen Signed-off-by: Peter Huewe Reviewed-by: Jarkko Sakkinen Tested-by: Benoit Houyere Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_spi.c | 87 ++++++++++------------------------ 1 file changed, 24 insertions(+), 63 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c index 5292e5768a7e..062799e04f04 100644 --- a/drivers/char/tpm/tpm_tis_spi.c +++ b/drivers/char/tpm/tpm_tis_spi.c @@ -47,8 +47,8 @@ struct tpm_tis_spi_phy { struct tpm_tis_data priv; struct spi_device *spi_device; - u8 tx_buf[MAX_SPI_FRAMESIZE + 4]; - u8 rx_buf[MAX_SPI_FRAMESIZE + 4]; + u8 tx_buf[4]; + u8 rx_buf[4]; }; static inline struct tpm_tis_spi_phy *to_tpm_tis_spi_phy(struct tpm_tis_data *data) @@ -56,8 +56,8 @@ static inline struct tpm_tis_spi_phy *to_tpm_tis_spi_phy(struct tpm_tis_data *da return container_of(data, struct tpm_tis_spi_phy, priv); } -static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, - u16 len, u8 *result) +static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len, + u8 *buffer, u8 direction) { struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data); int ret, i; @@ -66,17 +66,17 @@ static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, .tx_buf = phy->tx_buf, .rx_buf = phy->rx_buf, .len = 4, + .cs_change = 1, }; if (len > MAX_SPI_FRAMESIZE) return -ENOMEM; - phy->tx_buf[0] = 0x80 | (len - 1); + phy->tx_buf[0] = direction | (len - 1); phy->tx_buf[1] = 0xd4; - phy->tx_buf[2] = (addr >> 8) & 0xFF; - phy->tx_buf[3] = addr & 0xFF; + phy->tx_buf[2] = addr >> 8; + phy->tx_buf[3] = addr; - spi_xfer.cs_change = 1; spi_message_init(&m); spi_message_add_tail(&spi_xfer, &m); @@ -85,7 +85,7 @@ static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, if (ret < 0) goto exit; - memset(phy->tx_buf, 0, len); + phy->tx_buf[0] = 0; /* According to TCG PTP specification, if there is no TPM present at * all, then the design has a weak pull-up on MISO. If a TPM is not @@ -103,7 +103,14 @@ static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, spi_xfer.cs_change = 0; spi_xfer.len = len; - spi_xfer.rx_buf = result; + + if (direction) { + spi_xfer.tx_buf = NULL; + spi_xfer.rx_buf = buffer; + } else { + spi_xfer.tx_buf = buffer; + spi_xfer.rx_buf = NULL; + } spi_message_init(&m); spi_message_add_tail(&spi_xfer, &m); @@ -114,62 +121,16 @@ static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, return ret; } +static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, + u16 len, u8 *result) +{ + return tpm_tis_spi_transfer(data, addr, len, result, 0x80); +} + static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, u8 *value) { - struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data); - int ret, i; - struct spi_message m; - struct spi_transfer spi_xfer = { - .tx_buf = phy->tx_buf, - .rx_buf = phy->rx_buf, - .len = 4, - }; - - if (len > MAX_SPI_FRAMESIZE) - return -ENOMEM; - - phy->tx_buf[0] = len - 1; - phy->tx_buf[1] = 0xd4; - phy->tx_buf[2] = (addr >> 8) & 0xFF; - phy->tx_buf[3] = addr & 0xFF; - - spi_xfer.cs_change = 1; - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - - spi_bus_lock(phy->spi_device->master); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) - goto exit; - - memset(phy->tx_buf, 0, len); - - /* According to TCG PTP specification, if there is no TPM present at - * all, then the design has a weak pull-up on MISO. If a TPM is not - * present, a pull-up on MISO means that the SB controller sees a 1, - * and will latch in 0xFF on the read. - */ - for (i = 0; (phy->rx_buf[0] & 0x01) == 0 && i < TPM_RETRY; i++) { - spi_xfer.len = 1; - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) - goto exit; - } - - spi_xfer.len = len; - spi_xfer.tx_buf = value; - spi_xfer.cs_change = 0; - spi_xfer.tx_buf = value; - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - ret = spi_sync_locked(phy->spi_device, &m); - -exit: - spi_bus_unlock(phy->spi_device->master); - return ret; + return tpm_tis_spi_transfer(data, addr, len, value, 0); } static int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result) From 6b2a246ad05f22d07d197a0fb67e7f55820c1efb Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Thu, 2 Mar 2017 13:03:12 +0000 Subject: [PATCH 154/465] tpm_tis_spi: Abort transfer when too many wait states are signaled commit 975094ddc369a32f27210248bdd9bbd153061b00 upstream. Abort the transfer with ETIMEDOUT when the TPM signals more than TPM_RETRY wait states. Continuing with the transfer in this state will only lead to arbitrary failures in other parts of the code. Fixes: 0edbfea537d1 ("tpm/tpm_tis_spi: Add support for spi phy") Signed-off-by: Alexander Steffen Signed-off-by: Peter Huewe Reviewed-by: Jarkko Sakkinen Tested-by: Benoit Houyere Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_spi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c index 062799e04f04..639614f2d415 100644 --- a/drivers/char/tpm/tpm_tis_spi.c +++ b/drivers/char/tpm/tpm_tis_spi.c @@ -101,6 +101,11 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len, goto exit; } + if (i == TPM_RETRY) { + ret = -ETIMEDOUT; + goto exit; + } + spi_xfer.cs_change = 0; spi_xfer.len = len; From e2fbfd47e366d309ed94949fabf040a2765419e7 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Thu, 2 Mar 2017 13:03:13 +0000 Subject: [PATCH 155/465] tpm_tis_spi: Check correct byte for wait state indicator commit e110cc69dc2ad679d6d478df636b99b14e6fbbc9 upstream. Wait states are signaled in the last byte received from the TPM in response to the header, not the first byte. Check rx_buf[3] instead of rx_buf[0]. Fixes: 0edbfea537d1 ("tpm/tpm_tis_spi: Add support for spi phy") Signed-off-by: Alexander Steffen Signed-off-by: Peter Huewe Reviewed-by: Jarkko Sakkinen Tested-by: Benoit Houyere Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_spi.c | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c index 639614f2d415..62f50b6c9ef6 100644 --- a/drivers/char/tpm/tpm_tis_spi.c +++ b/drivers/char/tpm/tpm_tis_spi.c @@ -85,25 +85,25 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len, if (ret < 0) goto exit; - phy->tx_buf[0] = 0; - - /* According to TCG PTP specification, if there is no TPM present at - * all, then the design has a weak pull-up on MISO. If a TPM is not - * present, a pull-up on MISO means that the SB controller sees a 1, - * and will latch in 0xFF on the read. - */ - for (i = 0; (phy->rx_buf[0] & 0x01) == 0 && i < TPM_RETRY; i++) { - spi_xfer.len = 1; - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) + if ((phy->rx_buf[3] & 0x01) == 0) { + // handle SPI wait states + phy->tx_buf[0] = 0; + + for (i = 0; i < TPM_RETRY; i++) { + spi_xfer.len = 1; + spi_message_init(&m); + spi_message_add_tail(&spi_xfer, &m); + ret = spi_sync_locked(phy->spi_device, &m); + if (ret < 0) + goto exit; + if (phy->rx_buf[0] & 0x01) + break; + } + + if (i == TPM_RETRY) { + ret = -ETIMEDOUT; goto exit; - } - - if (i == TPM_RETRY) { - ret = -ETIMEDOUT; - goto exit; + } } spi_xfer.cs_change = 0; From 32a6b61947548bb1f2666171c5adc10cfd841ae7 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Thu, 2 Mar 2017 13:03:14 +0000 Subject: [PATCH 156/465] tpm_tis_spi: Remove limitation of transfers to MAX_SPI_FRAMESIZE bytes commit 591e48c26ced7c455751eef27fb5963e902c2137 upstream. Limiting transfers to MAX_SPI_FRAMESIZE was not expected by the upper layers, as tpm_tis has no such limitation. Add a loop to hide that limitation. v2: Moved scope of spi_message to the top as requested by Jarkko Fixes: 0edbfea537d1 ("tpm/tpm_tis_spi: Add support for spi phy") Signed-off-by: Alexander Steffen Signed-off-by: Peter Huewe Reviewed-by: Jarkko Sakkinen Tested-by: Benoit Houyere Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_spi.c | 107 ++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c index 62f50b6c9ef6..3015c8b65f18 100644 --- a/drivers/char/tpm/tpm_tis_spi.c +++ b/drivers/char/tpm/tpm_tis_spi.c @@ -60,67 +60,76 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len, u8 *buffer, u8 direction) { struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data); - int ret, i; + int ret = 0; + int i; struct spi_message m; - struct spi_transfer spi_xfer = { - .tx_buf = phy->tx_buf, - .rx_buf = phy->rx_buf, - .len = 4, - .cs_change = 1, - }; - - if (len > MAX_SPI_FRAMESIZE) - return -ENOMEM; + struct spi_transfer spi_xfer; + u8 transfer_len; - phy->tx_buf[0] = direction | (len - 1); - phy->tx_buf[1] = 0xd4; - phy->tx_buf[2] = addr >> 8; - phy->tx_buf[3] = addr; + spi_bus_lock(phy->spi_device->master); - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); + while (len) { + transfer_len = min_t(u16, len, MAX_SPI_FRAMESIZE); - spi_bus_lock(phy->spi_device->master); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) - goto exit; - - if ((phy->rx_buf[3] & 0x01) == 0) { - // handle SPI wait states - phy->tx_buf[0] = 0; - - for (i = 0; i < TPM_RETRY; i++) { - spi_xfer.len = 1; - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) + phy->tx_buf[0] = direction | (transfer_len - 1); + phy->tx_buf[1] = 0xd4; + phy->tx_buf[2] = addr >> 8; + phy->tx_buf[3] = addr; + + memset(&spi_xfer, 0, sizeof(spi_xfer)); + spi_xfer.tx_buf = phy->tx_buf; + spi_xfer.rx_buf = phy->rx_buf; + spi_xfer.len = 4; + spi_xfer.cs_change = 1; + + spi_message_init(&m); + spi_message_add_tail(&spi_xfer, &m); + ret = spi_sync_locked(phy->spi_device, &m); + if (ret < 0) + goto exit; + + if ((phy->rx_buf[3] & 0x01) == 0) { + // handle SPI wait states + phy->tx_buf[0] = 0; + + for (i = 0; i < TPM_RETRY; i++) { + spi_xfer.len = 1; + spi_message_init(&m); + spi_message_add_tail(&spi_xfer, &m); + ret = spi_sync_locked(phy->spi_device, &m); + if (ret < 0) + goto exit; + if (phy->rx_buf[0] & 0x01) + break; + } + + if (i == TPM_RETRY) { + ret = -ETIMEDOUT; goto exit; - if (phy->rx_buf[0] & 0x01) - break; + } } - if (i == TPM_RETRY) { - ret = -ETIMEDOUT; - goto exit; + spi_xfer.cs_change = 0; + spi_xfer.len = transfer_len; + + if (direction) { + spi_xfer.tx_buf = NULL; + spi_xfer.rx_buf = buffer; + } else { + spi_xfer.tx_buf = buffer; + spi_xfer.rx_buf = NULL; } - } - spi_xfer.cs_change = 0; - spi_xfer.len = len; + spi_message_init(&m); + spi_message_add_tail(&spi_xfer, &m); + ret = spi_sync_locked(phy->spi_device, &m); + if (ret < 0) + goto exit; - if (direction) { - spi_xfer.tx_buf = NULL; - spi_xfer.rx_buf = buffer; - } else { - spi_xfer.tx_buf = buffer; - spi_xfer.rx_buf = NULL; + len -= transfer_len; + buffer += transfer_len; } - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - ret = spi_sync_locked(phy->spi_device, &m); - exit: spi_bus_unlock(phy->spi_device->master); return ret; From 84eef50e8233f03dca895cc12f4295cf15bca40a Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Thu, 2 Mar 2017 13:03:15 +0000 Subject: [PATCH 157/465] tpm_tis_spi: Add small delay after last transfer commit 5cc0101d1f88500f8901d01b035af743215d4c3a upstream. Testing the implementation with a Raspberry Pi 2 showed that under some circumstances its SPI master erroneously releases the CS line before the transfer is complete, i.e. before the end of the last clock. In this case the TPM ignores the transfer and misses for example the GO command. The driver is unable to detect this communication problem and will wait for a command response that is never going to arrive, timing out eventually. As a workaround, the small delay ensures that the CS line is held long enough, even with a faulty SPI master. Other SPI masters are not affected, except for a negligible performance penalty. Fixes: 0edbfea537d1 ("tpm/tpm_tis_spi: Add support for spi phy") Signed-off-by: Alexander Steffen Signed-off-by: Peter Huewe Reviewed-by: Jarkko Sakkinen Tested-by: Benoit Houyere Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c index 3015c8b65f18..88fe72ae967f 100644 --- a/drivers/char/tpm/tpm_tis_spi.c +++ b/drivers/char/tpm/tpm_tis_spi.c @@ -111,6 +111,7 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len, spi_xfer.cs_change = 0; spi_xfer.len = transfer_len; + spi_xfer.delay_usecs = 5; if (direction) { spi_xfer.tx_buf = NULL; From 6705f253bc2b6c51a1642f75a789ccb153a25cdb Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Fri, 10 Mar 2017 13:45:53 -0500 Subject: [PATCH 158/465] tpm: msleep() delays - replace with usleep_range() in i2c nuvoton driver commit a233a0289cf9a96ef9b42c730a7621ccbf9a6f98 upstream. Commit 500462a9de65 "timers: Switch to a non-cascading wheel" replaced the 'classic' timer wheel, which aimed for near 'exact' expiry of the timers. Their analysis was that the vast majority of timeout timers are used as safeguards, not as real timers, and are cancelled or rearmed before expiration. The only exception noted to this were networking timers with a small expiry time. Not included in the analysis was the TPM polling timer, which resulted in a longer normal delay and, every so often, a very long delay. The non-cascading wheel delay is based on CONFIG_HZ. For a description of the different rings and their delays, refer to the comments in kernel/time/timer.c. Below are the delays given for rings 0 - 2, which explains the longer "normal" delays and the very, long delays as seen on systems with CONFIG_HZ 250. * HZ 1000 steps * Level Offset Granularity Range * 0 0 1 ms 0 ms - 63 ms * 1 64 8 ms 64 ms - 511 ms * 2 128 64 ms 512 ms - 4095 ms (512ms - ~4s) * HZ 250 * Level Offset Granularity Range * 0 0 4 ms 0 ms - 255 ms * 1 64 32 ms 256 ms - 2047 ms (256ms - ~2s) * 2 128 256 ms 2048 ms - 16383 ms (~2s - ~16s) Below is a comparison of extending the TPM with 1000 measurements, using msleep() vs. usleep_delay() when configured for 1000 hz vs. 250 hz, before and after commit 500462a9de65. linux-4.7 | msleep() usleep_range() 1000 hz: 0m44.628s | 1m34.497s 29.243s 250 hz: 1m28.510s | 4m49.269s 32.386s linux-4.7 | min-max (msleep) min-max (usleep_range) 1000 hz: 0:017 - 2:760s | 0:015 - 3:967s 0:014 - 0:418s 250 hz: 0:028 - 1:954s | 0:040 - 4:096s 0:016 - 0:816s This patch replaces the msleep() with usleep_range() calls in the i2c nuvoton driver with a consistent max range value. Signed-of-by: Mimi Zohar Signed-off-by: Nayna Jain Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_i2c_nuvoton.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index e3a9155ee671..0c98c424d792 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -49,9 +49,10 @@ */ #define TPM_I2C_MAX_BUF_SIZE 32 #define TPM_I2C_RETRY_COUNT 32 -#define TPM_I2C_BUS_DELAY 1 /* msec */ -#define TPM_I2C_RETRY_DELAY_SHORT 2 /* msec */ -#define TPM_I2C_RETRY_DELAY_LONG 10 /* msec */ +#define TPM_I2C_BUS_DELAY 1000 /* usec */ +#define TPM_I2C_RETRY_DELAY_SHORT (2 * 1000) /* usec */ +#define TPM_I2C_RETRY_DELAY_LONG (10 * 1000) /* usec */ +#define TPM_I2C_DELAY_RANGE 300 /* usec */ #define OF_IS_TPM2 ((void *)1) #define I2C_IS_TPM2 1 @@ -123,7 +124,8 @@ static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data) /* this causes the current command to be aborted */ for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) { status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data); - msleep(TPM_I2C_BUS_DELAY); + usleep_range(TPM_I2C_BUS_DELAY, TPM_I2C_BUS_DELAY + + TPM_I2C_DELAY_RANGE); } return status; } @@ -160,7 +162,8 @@ static int i2c_nuvoton_get_burstcount(struct i2c_client *client, burst_count = min_t(u8, TPM_I2C_MAX_BUF_SIZE, data); break; } - msleep(TPM_I2C_BUS_DELAY); + usleep_range(TPM_I2C_BUS_DELAY, TPM_I2C_BUS_DELAY + + TPM_I2C_DELAY_RANGE); } while (time_before(jiffies, stop)); return burst_count; @@ -203,13 +206,17 @@ static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value, return 0; /* use polling to wait for the event */ - ten_msec = jiffies + msecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG); + ten_msec = jiffies + usecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG); stop = jiffies + timeout; do { if (time_before(jiffies, ten_msec)) - msleep(TPM_I2C_RETRY_DELAY_SHORT); + usleep_range(TPM_I2C_RETRY_DELAY_SHORT, + TPM_I2C_RETRY_DELAY_SHORT + + TPM_I2C_DELAY_RANGE); else - msleep(TPM_I2C_RETRY_DELAY_LONG); + usleep_range(TPM_I2C_RETRY_DELAY_LONG, + TPM_I2C_RETRY_DELAY_LONG + + TPM_I2C_DELAY_RANGE); status_valid = i2c_nuvoton_check_status(chip, mask, value); if (status_valid) From 7cb54bfbd52c35ce1fc1480099e90ffc28d2d94d Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Fri, 10 Mar 2017 13:45:54 -0500 Subject: [PATCH 159/465] tpm: add sleep only for retry in i2c_nuvoton_write_status() commit 0afb7118ae021e80ecf70f5a3336e0935505518a upstream. Currently, there is an unnecessary 1 msec delay added in i2c_nuvoton_write_status() for the successful case. This function is called multiple times during send() and recv(), which implies adding multiple extra delays for every TPM operation. This patch calls usleep_range() only if retry is to be done. Signed-off-by: Nayna Jain Reviewed-by: Mimi Zohar Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_i2c_nuvoton.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index 0c98c424d792..c6428771841f 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -124,8 +124,9 @@ static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data) /* this causes the current command to be aborted */ for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) { status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data); - usleep_range(TPM_I2C_BUS_DELAY, TPM_I2C_BUS_DELAY - + TPM_I2C_DELAY_RANGE); + if (status < 0) + usleep_range(TPM_I2C_BUS_DELAY, TPM_I2C_BUS_DELAY + + TPM_I2C_DELAY_RANGE); } return status; } From b06ad9f0bf81139d2b29493792a10ecf7ed19ad9 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Fri, 10 Mar 2017 17:46:04 -0700 Subject: [PATCH 160/465] tpm_crb: check for bad response size commit 8569defde8057258835c51ce01a33de82e14b148 upstream. Make sure size of response buffer is at least 6 bytes, or we will underflow and pass large size_t to memcpy_fromio(). This was encountered while testing earlier version of locality patchset. Fixes: 30fc8d138e912 ("tpm: TPM 2.0 CRB Interface") Signed-off-by: Jerry Snitselaar Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_crb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 86f355b6df1d..dc760117f5ad 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -176,8 +176,7 @@ static int crb_recv(struct tpm_chip *chip, u8 *buf, size_t count) memcpy_fromio(buf, priv->rsp, 6); expected = be32_to_cpup((__be32 *) &buf[2]); - - if (expected > count) + if (expected > count || expected < 6) return -EIO; memcpy_fromio(&buf[6], &priv->rsp[6], expected - 6); From 455edd3ceb773ee8d2c1f7f396eb190d934ab648 Mon Sep 17 00:00:00 2001 From: "Hon Ching \\(Vicky) Lo" Date: Wed, 15 Mar 2017 01:28:07 -0400 Subject: [PATCH 161/465] vTPM: Fix missing NULL check commit 31574d321c70f6d3b40fe98f9b2eafd9a903fef9 upstream. The current code passes the address of tpm_chip as the argument to dev_get_drvdata() without prior NULL check in tpm_ibmvtpm_get_desired_dma. This resulted an oops during kernel boot when vTPM is enabled in Power partition configured in active memory sharing mode. The vio_driver's get_desired_dma() is called before the probe(), which for vtpm is tpm_ibmvtpm_probe, and it's this latter function that initializes the driver and set data. Attempting to get data before the probe() caused the problem. This patch adds a NULL check to the tpm_ibmvtpm_get_desired_dma. fixes: 9e0d39d8a6a0 ("tpm: Remove useless priv field in struct tpm_vendor_specific") Signed-off-by: Hon Ching(Vicky) Lo Reviewed-by: Jarkko Sakkine Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_ibmvtpm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 1b9d61ffe991..f01d083eced2 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -299,6 +299,8 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) } kfree(ibmvtpm); + /* For tpm_ibmvtpm_get_desired_dma */ + dev_set_drvdata(&vdev->dev, NULL); return 0; } @@ -313,14 +315,16 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) static unsigned long tpm_ibmvtpm_get_desired_dma(struct vio_dev *vdev) { struct tpm_chip *chip = dev_get_drvdata(&vdev->dev); - struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); + struct ibmvtpm_dev *ibmvtpm; /* * ibmvtpm initializes at probe time, so the data we are * asking for may not be set yet. Estimate that 4K required * for TCE-mapped buffer in addition to CRQ. */ - if (!ibmvtpm) + if (chip) + ibmvtpm = dev_get_drvdata(&chip->dev); + else return CRQ_RES_BUF_SIZE + PAGE_SIZE; return CRQ_RES_BUF_SIZE + ibmvtpm->rtce_size; From a7caccf886ea8db59eccbda342ad65ec2f6b4ea1 Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Wed, 29 Mar 2017 00:43:30 -0700 Subject: [PATCH 162/465] tpm: fix handling of the TPM 2.0 event logs commit fd5c78694f3f1c875e293de7a641ba8a3d60d00d upstream. When TPM2 log has entries with more than 3 digests, or with digests not listed in the log header, log gets misparsed, eventually leading to kernel complaint that code tried to vmalloc 512MB of memory (I have no idea what would happen on bigger system). So code should not parse only first 3 digests: both event header and event itself are already in memory, so we can parse any number of digests, as long as we do not try to parse whole memory when given count of 0xFFFFFFFF. So this change: * Rejects event entry with more digests than log header describes. Digest types should be unique, and all should be described in log header, so there cannot be more digests in the event than in the header. * Reject event entry with digest that is not described in the log header. In theory code could hardcode information about digest IDs already assigned by TCG, but if firmware authors cannot get event log format right, why should anyone believe that they got event log content right. Fixes: 4d23cc323cdb ("tpm: add securityfs support for TPM 2.0 firmware event log") Signed-off-by: Petr Vandrovec Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm2_eventlog.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm2_eventlog.c b/drivers/char/tpm/tpm2_eventlog.c index 513897cf9c4b..34a8afa69138 100644 --- a/drivers/char/tpm/tpm2_eventlog.c +++ b/drivers/char/tpm/tpm2_eventlog.c @@ -56,18 +56,24 @@ static int calc_tpm2_event_size(struct tcg_pcr_event2 *event, efispecid = (struct tcg_efi_specid_event *)event_header->event; - for (i = 0; (i < event->count) && (i < TPM2_ACTIVE_PCR_BANKS); - i++) { + /* Check if event is malformed. */ + if (event->count > efispecid->num_algs) + return 0; + + for (i = 0; i < event->count; i++) { halg_size = sizeof(event->digests[i].alg_id); memcpy(&halg, marker, halg_size); marker = marker + halg_size; - for (j = 0; (j < efispecid->num_algs); j++) { + for (j = 0; j < efispecid->num_algs; j++) { if (halg == efispecid->digest_sizes[j].alg_id) { - marker = marker + + marker += efispecid->digest_sizes[j].digest_size; break; } } + /* Algorithm without known length. Such event is unparseable. */ + if (j == efispecid->num_algs) + return 0; } event_field = (struct tcg_event_field *)marker; From cca9c04ad435bca5637d4b19c5378792d079e9bd Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Sat, 29 Apr 2017 12:19:33 +0200 Subject: [PATCH 163/465] ASoC: cs4271: configure reset GPIO as output commit 49b2e27ab9f66b0a22c21980ad8118a4038324ae upstream. During reset "refactoring" the output configuration was lost. This commit repairs sound on EDB93XX boards. Fixes: 9a397f4 ("ASoC: cs4271: add regulator consumer support") Signed-off-by: Alexander Sverdlin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/cs4271.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs4271.c b/sound/soc/codecs/cs4271.c index 8c0f3b89b5bc..e78b5f055f25 100644 --- a/sound/soc/codecs/cs4271.c +++ b/sound/soc/codecs/cs4271.c @@ -498,7 +498,7 @@ static int cs4271_reset(struct snd_soc_codec *codec) struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec); if (gpio_is_valid(cs4271->gpio_nreset)) { - gpio_set_value(cs4271->gpio_nreset, 0); + gpio_direction_output(cs4271->gpio_nreset, 0); mdelay(1); gpio_set_value(cs4271->gpio_nreset, 1); mdelay(1); From 3992c93b742c67dc11bcc47753a9f13fc3f4e6df Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Apr 2017 14:31:42 +0300 Subject: [PATCH 164/465] mlx5: Fix mlx5_ib_map_mr_sg mr length commit 0a49f2c31c3efbeb0de3e4b5598764887f629be2 upstream. In case we got an initial sg_offset, we need to account for it in the mr length. Fixes: ff2ba9936591 ("IB/core: Add passing an offset into the SG to ib_map_mr_sg") Signed-off-by: Sagi Grimberg Tested-by: Israel Rukshin Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b8f9382a8b7d..d9c6c0ea750b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1782,7 +1782,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); klms[i].key = cpu_to_be32(lkey); - mr->ibmr.length += sg_dma_len(sg); + mr->ibmr.length += sg_dma_len(sg) - sg_offset; sg_offset = 0; } From cd21f4af214fca46b8346bca8adcef5224b2a126 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 28 Apr 2017 11:20:01 +0200 Subject: [PATCH 165/465] infiniband: call ipv6 route lookup via the stub interface commit eea40b8f624f25cbc02d55f2d93203f60cee9341 upstream. The infiniband address handle can be triggered to resolve an ipv6 address in response to MAD packets, regardless of the ipv6 module being disabled via the kernel command line argument. That will cause a call into the ipv6 routing code, which is not initialized, and a conseguent oops. This commit addresses the above issue replacing the direct lookup call with an indirect one via the ipv6 stub, which is properly initialized according to the ipv6 status (e.g. if ipv6 is disabled, the routing lookup fails gracefully) Signed-off-by: Paolo Abeni Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 0f58f46dbad7..8fd108d89527 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -444,8 +444,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - dst = ip6_route_output(addr->net, NULL, &fl6); - if ((ret = dst->error)) + ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); + if (ret < 0) goto put; rt = (struct rt6_info *)dst; From 11dfdb1a46b4fe98559ff8799256eef1ffebd09e Mon Sep 17 00:00:00 2001 From: Vinothkumar Raja Date: Thu, 6 Apr 2017 22:09:38 -0400 Subject: [PATCH 166/465] dm btree: fix for dm_btree_find_lowest_key() commit 7d1fedb6e96a960aa91e4ff70714c3fb09195a5a upstream. dm_btree_find_lowest_key() is giving incorrect results. find_key() traverses the btree correctly for finding the highest key, but there is an error in the way it traverses the btree for retrieving the lowest key. dm_btree_find_lowest_key() fetches the first key of the rightmost block of the btree instead of fetching the first key from the leftmost block. Fix this by conditionally passing the correct parameter to value64() based on the @find_highest flag. Signed-off-by: Erez Zadok Signed-off-by: Vinothkumar Raja Signed-off-by: Nidhi Panpalia Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 02e2ee0d8a00..f21ce6a3d4cf 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -902,8 +902,12 @@ static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest, else *result_key = le64_to_cpu(ro_node(s)->keys[0]); - if (next_block || flags & INTERNAL_NODE) - block = value64(ro_node(s), i); + if (next_block || flags & INTERNAL_NODE) { + if (find_highest) + block = value64(ro_node(s), i); + else + block = value64(ro_node(s), 0); + } } while (flags & INTERNAL_NODE); From 539ee786497fc7529a895954ccac220f8f10b42d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 28 Mar 2017 12:53:39 -0400 Subject: [PATCH 167/465] dm raid: select the Kconfig option CONFIG_MD_RAID0 commit 7b81ef8b14f80033e4a4168d199a0f5fd79b9426 upstream. Since the commit 0cf4503174c1 ("dm raid: add support for the MD RAID0 personality"), the dm-raid subsystem can activate a RAID-0 array. Therefore, add MD_RAID0 to the dependencies of DM_RAID, so that MD_RAID0 will be selected when DM_RAID is selected. Fixes: 0cf4503174c1 ("dm raid: add support for the MD RAID0 personality") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b7767da50c26..b5372fd7d441 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -365,6 +365,7 @@ config DM_LOG_USERSPACE config DM_RAID tristate "RAID 1/4/5/6/10 target" depends on BLK_DEV_DM + select MD_RAID0 select MD_RAID1 select MD_RAID10 select MD_RAID456 From 2aac21e0014d00a2d821581cc65f0e8354c7abec Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 30 Apr 2017 17:33:26 -0400 Subject: [PATCH 168/465] dm bufio: avoid a possible ABBA deadlock commit 1b0fb5a5b2dc0dddcfa575060441a7176ba7ac37 upstream. __get_memory_limit() tests if dm_bufio_cache_size changed and calls __cache_size_refresh() if it did. It takes dm_bufio_clients_lock while it already holds the client lock. However, lock ordering is violated because in cleanup_old_buffers() dm_bufio_clients_lock is taken before the client lock. This results in a possible deadlock and lockdep engine warning. Fix this deadlock by changing mutex_lock() to mutex_trylock(). If the lock can't be taken, it will be re-checked next time when a new buffer is allocated. Also add "unlikely" to the if condition, so that the optimizer assumes that the condition is false. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index df4859f6ac6a..644c5da11529 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -933,10 +933,11 @@ static void __get_memory_limit(struct dm_bufio_client *c, { unsigned long buffers; - if (ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch) { - mutex_lock(&dm_bufio_clients_lock); - __cache_size_refresh(); - mutex_unlock(&dm_bufio_clients_lock); + if (unlikely(ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) { + if (mutex_trylock(&dm_bufio_clients_lock)) { + __cache_size_refresh(); + mutex_unlock(&dm_bufio_clients_lock); + } } buffers = dm_bufio_cache_size_per_client >> From 7d67cd026df1d5a5c6b260af3f6bc23924464426 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 30 Apr 2017 17:34:53 -0400 Subject: [PATCH 169/465] dm bufio: check new buffer allocation watermark every 30 seconds commit 390020ad2af9ca04844c4f3b1f299ad8746d84c8 upstream. dm-bufio checks a watermark when it allocates a new buffer in __bufio_new(). However, it doesn't check the watermark when the user changes /sys/module/dm_bufio/parameters/max_cache_size_bytes. This may result in a problem - if the watermark is high enough so that all possible buffers are allocated and if the user lowers the value of "max_cache_size_bytes", the watermark will never be checked against the new value because no new buffer would be allocated. To fix this, change __evict_old_buffers() so that it checks the watermark. __evict_old_buffers() is called every 30 seconds, so if the user reduces "max_cache_size_bytes", dm-bufio will react to this change within 30 seconds and decrease memory consumption. Depends-on: 1b0fb5a5b2 ("dm bufio: avoid a possible ABBA deadlock") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 644c5da11529..d83db5569b21 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1783,9 +1783,17 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) struct dm_buffer *b, *tmp; unsigned retain_target = get_retain_buffers(c); unsigned count; + LIST_HEAD(write_list); dm_bufio_lock(c); + __check_watermark(c, &write_list); + if (unlikely(!list_empty(&write_list))) { + dm_bufio_unlock(c); + __flush_write_list(&write_list); + dm_bufio_lock(c); + } + count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) { if (count <= retain_target) @@ -1810,6 +1818,8 @@ static void cleanup_old_buffers(void) mutex_lock(&dm_bufio_clients_lock); + __cache_size_refresh(); + list_for_each_entry(c, &dm_bufio_all_clients, client_list) __evict_old_buffers(c, max_age_hz); From 4ff00db718af4b4571f236668488bffcd67c5418 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 16:50:44 -0700 Subject: [PATCH 170/465] dm mpath: requeue after a small delay if blk_get_request() fails commit 06eb061f48594aa369f6e852b352410298b317a8 upstream. If blk_get_request() returns ENODEV then multipath_clone_and_map() causes a request to be requeued immediately. This can cause a kworker thread to spend 100% of the CPU time of a single core in __blk_mq_run_hw_queue() and also can cause device removal to never finish. Avoid this by only requeuing after a delay if blk_get_request() fails. Additionally, reduce the requeue delay. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-mpath.c | 5 ++--- drivers/md/dm-rq.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7f223dbed49f..a4cc4d42117b 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -484,7 +484,6 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, struct request **__clone) { struct multipath *m = ti->private; - int r = DM_MAPIO_REQUEUE; size_t nr_bytes = blk_rq_bytes(rq); struct pgpath *pgpath; struct block_device *bdev; @@ -503,7 +502,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { pg_init_all_paths(m); - return r; + return DM_MAPIO_REQUEUE; } memset(mpio, 0, sizeof(*mpio)); @@ -517,7 +516,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, GFP_ATOMIC); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ - return r; + return DM_MAPIO_DELAY_REQUEUE; } clone->bio = clone->biotail = NULL; clone->rq_disk = bdev->bd_disk; diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 505b9f6b4a47..93310edb3e09 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -280,7 +280,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ if (!rq->q->mq_ops) dm_old_requeue_request(rq); else - dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0); + dm_mq_delay_requeue_request(rq, delay_requeue ? 100/*ms*/ : 0); rq_completed(md, rw, false); } From 5a09414335f06d28002d43c7e0a3e29ea9ebd54c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:14 -0700 Subject: [PATCH 171/465] dm mpath: split and rename activate_path() to prepare for its expanded use commit 89bfce763e43fa4897e0d3af6b29ed909df64cfd upstream. activate_path() is renamed to activate_path_work() which now calls activate_or_offline_path(). activate_or_offline_path() will be used by the next commit. Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-mpath.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a4cc4d42117b..4b891d9ff349 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -111,7 +111,8 @@ typedef int (*action_fn) (struct pgpath *pgpath); static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void trigger_event(struct work_struct *work); -static void activate_path(struct work_struct *work); +static void activate_or_offline_path(struct pgpath *pgpath); +static void activate_path_work(struct work_struct *work); static void process_queued_bios(struct work_struct *work); /*----------------------------------------------- @@ -136,7 +137,7 @@ static struct pgpath *alloc_pgpath(void) if (pgpath) { pgpath->is_active = true; - INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); + INIT_DELAYED_WORK(&pgpath->activate_path, activate_path_work); } return pgpath; @@ -1436,10 +1437,8 @@ static void pg_init_done(void *data, int errors) spin_unlock_irqrestore(&m->lock, flags); } -static void activate_path(struct work_struct *work) +static void activate_or_offline_path(struct pgpath *pgpath) { - struct pgpath *pgpath = - container_of(work, struct pgpath, activate_path.work); struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); if (pgpath->is_active && !blk_queue_dying(q)) @@ -1448,6 +1447,14 @@ static void activate_path(struct work_struct *work) pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); } +static void activate_path_work(struct work_struct *work) +{ + struct pgpath *pgpath = + container_of(work, struct pgpath, activate_path.work); + + activate_or_offline_path(pgpath); +} + static int noretry_error(int error) { switch (error) { From 5087ded2bbd48bf84a5840624bfc901f4bd75613 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:15 -0700 Subject: [PATCH 172/465] dm mpath: avoid that path removal can trigger an infinite loop commit 7083abbbfc4fa706ff72d27d33a5214881979336 upstream. If blk_get_request() fails, check whether the failure is due to a path being removed. If that is the case, fail the path by triggering a call to fail_path(). This avoids that the following scenario can be encountered while removing paths: * CPU usage of a kworker thread jumps to 100%. * Removing the DM device becomes impossible. Delay requeueing if blk_get_request() returns -EBUSY or -EWOULDBLOCK, and the queue is not dying, because in these cases immediate requeuing is inappropriate. Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-mpath.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 4b891d9ff349..f3c79f188747 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -489,6 +489,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, struct pgpath *pgpath; struct block_device *bdev; struct dm_mpath_io *mpio = get_mpio(map_context); + struct request_queue *q; struct request *clone; /* Do we need to select a new pgpath? */ @@ -511,12 +512,18 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, mpio->nr_bytes = nr_bytes; bdev = pgpath->path.dev->bdev; - - clone = blk_get_request(bdev_get_queue(bdev), - rq->cmd_flags | REQ_NOMERGE, - GFP_ATOMIC); + q = bdev_get_queue(bdev); + clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ + bool queue_dying = blk_queue_dying(q); + DMERR_LIMIT("blk_get_request() returned %ld%s - requeuing", + PTR_ERR(clone), queue_dying ? " (path offline)" : ""); + if (queue_dying) { + atomic_inc(&m->pg_init_in_progress); + activate_or_offline_path(pgpath); + return DM_MAPIO_REQUEUE; + } return DM_MAPIO_DELAY_REQUEUE; } clone->bio = clone->biotail = NULL; From a5b9afd690029f7d730b3b79ddba2c4858005f12 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:16 -0700 Subject: [PATCH 173/465] dm mpath: delay requeuing while path initialization is in progress commit c1d7ecf7ca11d0edd3085262c8597203440d056c upstream. Requeuing a request immediately while path initialization is ongoing causes high CPU usage, something that is undesired. Hence delay requeuing while path initialization is in progress. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-mpath.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f3c79f188747..d85baffa3377 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -322,13 +322,16 @@ static int __pg_init_all_paths(struct multipath *m) return atomic_read(&m->pg_init_in_progress); } -static void pg_init_all_paths(struct multipath *m) +static int pg_init_all_paths(struct multipath *m) { + int ret; unsigned long flags; spin_lock_irqsave(&m->lock, flags); - __pg_init_all_paths(m); + ret = __pg_init_all_paths(m); spin_unlock_irqrestore(&m->lock, flags); + + return ret; } static void __switch_pg(struct multipath *m, struct priority_group *pg) @@ -503,7 +506,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, return -EIO; /* Failed */ } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { - pg_init_all_paths(m); + if (pg_init_all_paths(m)) + return DM_MAPIO_DELAY_REQUEUE; return DM_MAPIO_REQUEUE; } From 2e80638cee4fb7d423e3c9285f0353d9a9d51a20 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 5 May 2017 14:40:13 -0400 Subject: [PATCH 174/465] dm cache metadata: fail operations if fail_io mode has been established commit 10add84e276432d9dd8044679a1028dd4084117e upstream. Otherwise it is possible to trigger crashes due to the metadata being inaccessible yet these methods don't safely account for that possibility without these checks. Reported-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 6735c8d6a445..ee05d19f8bbc 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1627,17 +1627,19 @@ void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown) { - int r; + int r = -EINVAL; flags_mutator mutator = (clean_shutdown ? set_clean_shutdown : clear_clean_shutdown); WRITE_LOCK(cmd); + if (cmd->fail_io) + goto out; + r = __commit_transaction(cmd, mutator); if (r) goto out; r = __begin_transaction(cmd); - out: WRITE_UNLOCK(cmd); return r; @@ -1649,7 +1651,8 @@ int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, int r = -EINVAL; READ_LOCK(cmd); - r = dm_sm_get_nr_free(cmd->metadata_sm, result); + if (!cmd->fail_io) + r = dm_sm_get_nr_free(cmd->metadata_sm, result); READ_UNLOCK(cmd); return r; @@ -1661,7 +1664,8 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, int r = -EINVAL; READ_LOCK(cmd); - r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); + if (!cmd->fail_io) + r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); READ_UNLOCK(cmd); return r; From 17cedf3ba1e34cac1ca6b6dd8c7fb40c015729e4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 30 Apr 2017 17:32:28 -0400 Subject: [PATCH 175/465] dm bufio: make the parameter "retain_bytes" unsigned long commit 13840d38016203f0095cd547b90352812d24b787 upstream. Change the type of the parameter "retain_bytes" from unsigned to unsigned long, so that on 64-bit machines the user can set more than 4GiB of data to be retained. Also, change the type of the variable "count" in the function "__evict_old_buffers" to unsigned long. The assignment "count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];" could result in unsigned long to unsigned overflow and that could result in buffers not being freed when they should. While at it, avoid division in get_retain_buffers(). Division is slow, we can change it to shift because we have precalculated the log2 of block size. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index d83db5569b21..2ac24180505f 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -216,7 +216,7 @@ static DEFINE_SPINLOCK(param_spinlock); * Buffers are freed after this timeout */ static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; -static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; +static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; static unsigned long dm_bufio_peak_allocated; static unsigned long dm_bufio_allocated_kmem_cache; @@ -1551,10 +1551,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) return true; } -static unsigned get_retain_buffers(struct dm_bufio_client *c) +static unsigned long get_retain_buffers(struct dm_bufio_client *c) { - unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); - return retain_bytes / c->block_size; + unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); + return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT); } static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, @@ -1564,7 +1564,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, struct dm_buffer *b, *tmp; unsigned long freed = 0; unsigned long count = nr_to_scan; - unsigned retain_target = get_retain_buffers(c); + unsigned long retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { @@ -1781,8 +1781,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz) static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) { struct dm_buffer *b, *tmp; - unsigned retain_target = get_retain_buffers(c); - unsigned count; + unsigned long retain_target = get_retain_buffers(c); + unsigned long count; LIST_HEAD(write_list); dm_bufio_lock(c); @@ -1942,7 +1942,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); -module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR); +module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); From f568a10289c8596b60ed40d423e12b1121f5739a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 15 May 2017 09:43:05 -0400 Subject: [PATCH 176/465] dm thin metadata: call precommit before saving the roots commit 91bcdb92d39711d1adb40c26b653b7978d93eb98 upstream. These calls were the wrong way round in __write_initial_superblock. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index a15091a0d40c..4477bf930cf4 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -485,11 +485,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) if (r < 0) return r; - r = save_sm_roots(pmd); + r = dm_tm_pre_commit(pmd->tm); if (r < 0) return r; - r = dm_tm_pre_commit(pmd->tm); + r = save_sm_roots(pmd); if (r < 0) return r; From d7252903e28c7ff86dd752f48641c594b6fbb149 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 15 May 2017 09:45:40 -0400 Subject: [PATCH 177/465] dm space map disk: fix some book keeping in the disk space map commit 0377a07c7a035e0d033cd8b29f0cb15244c0916a upstream. When decrementing the reference count for a block, the free count wasn't being updated if the reference count went to zero. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-space-map-disk.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index ebb280a14325..32adf6b4a9c7 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) { + int r; + uint32_t old_count; enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - return sm_ll_dec(&smd->ll, b, &ev); + r = sm_ll_dec(&smd->ll, b, &ev); + if (!r && (ev == SM_FREE)) { + /* + * It's only free if it's also free in the last + * transaction. + */ + r = sm_ll_lookup(&smd->old_ll, b, &old_count); + if (!r && !old_count) + smd->nr_allocated_this_transaction--; + } + + return r; } static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) From 1800fde3093e035ab37a68597b850c23437ae964 Mon Sep 17 00:00:00 2001 From: Dennis Yang Date: Wed, 29 Mar 2017 15:46:13 +0800 Subject: [PATCH 178/465] md: update slab_cache before releasing new stripes when stripes resizing commit 583da48e388f472e8818d9bb60ef6a1d40ee9f9d upstream. When growing raid5 device on machine with small memory, there is chance that mdadm will be killed and the following bug report can be observed. The same bug could also be reproduced in linux-4.10.6. [57600.075774] BUG: unable to handle kernel NULL pointer dereference at (null) [57600.083796] IP: [] _raw_spin_lock+0x7/0x20 [57600.110378] PGD 421cf067 PUD 4442d067 PMD 0 [57600.114678] Oops: 0002 [#1] SMP [57600.180799] CPU: 1 PID: 25990 Comm: mdadm Tainted: P O 4.2.8 #1 [57600.187849] Hardware name: To be filled by O.E.M. To be filled by O.E.M./MAHOBAY, BIOS QV05AR66 03/06/2013 [57600.197490] task: ffff880044e47240 ti: ffff880043070000 task.ti: ffff880043070000 [57600.204963] RIP: 0010:[] [] _raw_spin_lock+0x7/0x20 [57600.213057] RSP: 0018:ffff880043073810 EFLAGS: 00010046 [57600.218359] RAX: 0000000000000000 RBX: 000000000000000c RCX: ffff88011e296dd0 [57600.225486] RDX: 0000000000000001 RSI: ffffe8ffffcb46c0 RDI: 0000000000000000 [57600.232613] RBP: ffff880043073878 R08: ffff88011e5f8170 R09: 0000000000000282 [57600.239739] R10: 0000000000000005 R11: 28f5c28f5c28f5c3 R12: ffff880043073838 [57600.246872] R13: ffffe8ffffcb46c0 R14: 0000000000000000 R15: ffff8800b9706a00 [57600.253999] FS: 00007f576106c700(0000) GS:ffff88011e280000(0000) knlGS:0000000000000000 [57600.262078] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [57600.267817] CR2: 0000000000000000 CR3: 00000000428fe000 CR4: 00000000001406e0 [57600.274942] Stack: [57600.276949] ffffffff8114ee35 ffff880043073868 0000000000000282 000000000000eb3f [57600.284383] ffffffff81119043 ffff880043073838 ffff880043073838 ffff88003e197b98 [57600.291820] ffffe8ffffcb46c0 ffff88003e197360 0000000000000286 ffff880043073968 [57600.299254] Call Trace: [57600.301698] [] ? cache_flusharray+0x35/0xe0 [57600.307523] [] ? __page_cache_release+0x23/0x110 [57600.313779] [] kmem_cache_free+0x63/0xc0 [57600.319344] [] drop_one_stripe+0x62/0x90 [57600.324915] [] raid5_cache_scan+0x8b/0xb0 [57600.330563] [] shrink_slab.part.36+0x19a/0x250 [57600.336650] [] shrink_zone+0x23c/0x250 [57600.342039] [] do_try_to_free_pages+0x153/0x420 [57600.348210] [] try_to_free_pages+0x91/0xa0 [57600.353959] [] __alloc_pages_nodemask+0x4d1/0x8b0 [57600.360303] [] check_reshape+0x62b/0x770 [57600.365866] [] raid5_check_reshape+0x55/0xa0 [57600.371778] [] update_raid_disks+0xc7/0x110 [57600.377604] [] md_ioctl+0xd83/0x1b10 [57600.382827] [] blkdev_ioctl+0x170/0x690 [57600.388307] [] block_ioctl+0x38/0x40 [57600.393525] [] do_vfs_ioctl+0x2b5/0x480 [57600.399010] [] ? vfs_write+0x14b/0x1f0 [57600.404400] [] SyS_ioctl+0x3c/0x70 [57600.409447] [] entry_SYSCALL_64_fastpath+0x12/0x6a [57600.415875] Code: 00 00 00 00 55 48 89 e5 8b 07 85 c0 74 04 31 c0 5d c3 ba 01 00 00 00 f0 0f b1 17 85 c0 75 ef b0 01 5d c3 90 31 c0 ba 01 00 00 00 0f b1 17 85 c0 75 01 c3 55 89 c6 48 89 e5 e8 85 d1 63 ff 5d [57600.435460] RIP [] _raw_spin_lock+0x7/0x20 [57600.441208] RSP [57600.444690] CR2: 0000000000000000 [57600.448000] ---[ end trace cbc6b5cc4bf9831d ]--- The problem is that resize_stripes() releases new stripe_heads before assigning new slab cache to conf->slab_cache. If the shrinker function raid5_cache_scan() gets called after resize_stripes() starting releasing new stripes but right before new slab cache being assigned, it is possible that these new stripe_heads will be freed with the old slab_cache which was already been destoryed and that triggers this bug. Signed-off-by: Dennis Yang Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") Reviewed-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ed5cd705b985..b095bd296da7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2323,6 +2323,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) err = -ENOMEM; mutex_unlock(&conf->cache_size_mutex); + + conf->slab_cache = sc; + conf->active_name = 1-conf->active_name; + /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); @@ -2340,8 +2344,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) } /* critical section pass, GFP_NOIO no longer needed */ - conf->slab_cache = sc; - conf->active_name = 1-conf->active_name; if (!err) conf->pool_size = newsize; return err; From 699ae096347fb79420e12f0f82b14fa3a68e42f3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Apr 2017 11:16:33 +0800 Subject: [PATCH 179/465] md: MD_CLOSING needs to be cleared after called md_set_readonly or do_md_stop commit 065e519e71b2c1f41936cce75b46b5ab34adb588 upstream. if called md_set_readonly and set MD_CLOSING bit, the mddev cannot be opened any more due to the MD_CLOING bit wasn't cleared. Thus it needs to be cleared in md_ioctl after any call to md_set_readonly() or do_md_stop(). Signed-off-by: NeilBrown Fixes: af8d8e6f0315 ("md: changes for MD_STILL_CLOSED flag") Signed-off-by: Zhilong Liu Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index f6ae1d67bcd0..906a4bf600ac 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6776,6 +6776,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; int ro; + bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) return -ENOTTY; @@ -6865,7 +6866,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } + WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags)); set_bit(MD_CLOSING, &mddev->flags); + did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7058,6 +7061,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, mddev->hold_active = 0; mddev_unlock(mddev); out: + if(did_set_md_closing) + clear_bit(MD_CLOSING, &mddev->flags); return err; } #ifdef CONFIG_COMPAT From cf6df2bcd4521b728044356dea246967dc1cf0a3 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 16 Apr 2017 19:32:07 -0500 Subject: [PATCH 180/465] rtlwifi: rtl8821ae: setup 8812ae RFE according to device type commit 46cfa2148e7371c537efff1a1c693e58f523089d upstream. Current channel switch implementation sets 8812ae RFE reg value assuming that device always has type 2. Extend possible RFE types set and write corresponding reg values. Source for new code is http://dlcdnet.asus.com/pub/ASUS/wireless/PCE-AC51/DR_PCE_AC51_20232801152016.zip Signed-off-by: Maxim Samoylov Signed-off-by: Larry Finger Cc: Yan-Hsuan Chuang Cc: Pkshih Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- .../wireless/realtek/rtlwifi/rtl8821ae/phy.c | 122 +++++++++++++++--- .../wireless/realtek/rtlwifi/rtl8821ae/reg.h | 1 + 2 files changed, 107 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c index 8da874cbec1a..d8254bca0b02 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c @@ -358,6 +358,107 @@ bool rtl8821ae_phy_rf_config(struct ieee80211_hw *hw) return rtl8821ae_phy_rf6052_config(hw); } +static void _rtl8812ae_phy_set_rfe_reg_24g(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); + u8 tmp; + + switch (rtlhal->rfe_type) { + case 3: + rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, 0x54337770); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x54337770); + rtl_set_bbreg(hw, RA_RFE_INV, BMASKRFEINV, 0x010); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x010); + rtl_set_bbreg(hw, 0x900, 0x00000303, 0x1); + break; + case 4: + rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, 0x77777777); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x77777777); + rtl_set_bbreg(hw, RA_RFE_INV, BMASKRFEINV, 0x001); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x001); + break; + case 5: + rtl_write_byte(rtlpriv, RA_RFE_PINMUX + 2, 0x77); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x77777777); + tmp = rtl_read_byte(rtlpriv, RA_RFE_INV + 3); + rtl_write_byte(rtlpriv, RA_RFE_INV + 3, tmp & ~0x1); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x000); + break; + case 1: + if (rtlpriv->btcoexist.bt_coexistence) { + rtl_set_bbreg(hw, RA_RFE_PINMUX, 0xffffff, 0x777777); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, + 0x77777777); + rtl_set_bbreg(hw, RA_RFE_INV, 0x33f00000, 0x000); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x000); + break; + } + case 0: + case 2: + default: + rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, 0x77777777); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x77777777); + rtl_set_bbreg(hw, RA_RFE_INV, BMASKRFEINV, 0x000); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x000); + break; + } +} + +static void _rtl8812ae_phy_set_rfe_reg_5g(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); + u8 tmp; + + switch (rtlhal->rfe_type) { + case 0: + rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, 0x77337717); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x77337717); + rtl_set_bbreg(hw, RA_RFE_INV, BMASKRFEINV, 0x010); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x010); + break; + case 1: + if (rtlpriv->btcoexist.bt_coexistence) { + rtl_set_bbreg(hw, RA_RFE_PINMUX, 0xffffff, 0x337717); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, + 0x77337717); + rtl_set_bbreg(hw, RA_RFE_INV, 0x33f00000, 0x000); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x000); + } else { + rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, + 0x77337717); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, + 0x77337717); + rtl_set_bbreg(hw, RA_RFE_INV, BMASKRFEINV, 0x000); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x000); + } + break; + case 3: + rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, 0x54337717); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x54337717); + rtl_set_bbreg(hw, RA_RFE_INV, BMASKRFEINV, 0x010); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x010); + rtl_set_bbreg(hw, 0x900, 0x00000303, 0x1); + break; + case 5: + rtl_write_byte(rtlpriv, RA_RFE_PINMUX + 2, 0x33); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x77337777); + tmp = rtl_read_byte(rtlpriv, RA_RFE_INV + 3); + rtl_write_byte(rtlpriv, RA_RFE_INV + 3, tmp | 0x1); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x010); + break; + case 2: + case 4: + default: + rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, 0x77337777); + rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, 0x77337777); + rtl_set_bbreg(hw, RA_RFE_INV, BMASKRFEINV, 0x010); + rtl_set_bbreg(hw, RB_RFE_INV, BMASKRFEINV, 0x010); + break; + } +} + u32 phy_get_tx_swing_8812A(struct ieee80211_hw *hw, u8 band, u8 rf_path) { @@ -552,14 +653,9 @@ void rtl8821ae_phy_switch_wirelessband(struct ieee80211_hw *hw, u8 band) /* 0x82C[1:0] = 2b'00 */ rtl_set_bbreg(hw, 0x82c, 0x3, 0); } - if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) { - rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, - 0x77777777); - rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, - 0x77777777); - rtl_set_bbreg(hw, RA_RFE_INV, 0x3ff00000, 0x000); - rtl_set_bbreg(hw, RB_RFE_INV, 0x3ff00000, 0x000); - } + + if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) + _rtl8812ae_phy_set_rfe_reg_24g(hw); rtl_set_bbreg(hw, RTXPATH, 0xf0, 0x1); rtl_set_bbreg(hw, RCCK_RX, 0x0f000000, 0x1); @@ -614,14 +710,8 @@ void rtl8821ae_phy_switch_wirelessband(struct ieee80211_hw *hw, u8 band) /* 0x82C[1:0] = 2'b00 */ rtl_set_bbreg(hw, 0x82c, 0x3, 1); - if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) { - rtl_set_bbreg(hw, RA_RFE_PINMUX, BMASKDWORD, - 0x77337777); - rtl_set_bbreg(hw, RB_RFE_PINMUX, BMASKDWORD, - 0x77337777); - rtl_set_bbreg(hw, RA_RFE_INV, 0x3ff00000, 0x010); - rtl_set_bbreg(hw, RB_RFE_INV, 0x3ff00000, 0x010); - } + if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) + _rtl8812ae_phy_set_rfe_reg_5g(hw); rtl_set_bbreg(hw, RTXPATH, 0xf0, 0); rtl_set_bbreg(hw, RCCK_RX, 0x0f000000, 0xf); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/reg.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/reg.h index 1d6110f9c1fb..ed69dbe178ff 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/reg.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/reg.h @@ -2424,6 +2424,7 @@ #define BMASKH4BITS 0xf0000000 #define BMASKOFDM_D 0xffc00000 #define BMASKCCK 0x3f3f3f3f +#define BMASKRFEINV 0x3ff00000 #define BRFREGOFFSETMASK 0xfffff From a32a0c8285a44a9d3ec94ee40a943c6506ac657e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 5 Apr 2017 15:26:40 -0700 Subject: [PATCH 181/465] mwifiex: MAC randomization should not be persistent commit 7e2f18f06408ff56d7f75e68de8064777137b319 upstream. nl80211 provides the NL80211_SCAN_FLAG_RANDOM_ADDR for every scan request that should be randomized; the absence of such a flag means we should not randomize. However, mwifiex was stashing the latest randomization request and *always* using it for future scans, even those that didn't set the flag. Let's zero out the randomization info whenever we get a scan request without NL80211_SCAN_FLAG_RANDOM_ADDR. I'd prefer to remove priv->random_mac entirely (and plumb the randomization MAC properly through the call sequence), but the spaghetti is a little difficult to unravel here for me. Fixes: c2a8f0ff9c6c ("mwifiex: support random MAC address for scanning") Signed-off-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 1e3bd435a694..2d7e8a372bf1 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2528,9 +2528,11 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->random_mac[i] |= get_random_int() & ~(request->mac_addr_mask[i]); } + ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); + } else { + eth_zero_addr(priv->random_mac); } - ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); user_scan_cfg->num_ssids = request->n_ssids; user_scan_cfg->ssid_list = request->ssids; From d5811285b5a8d43bbf677931667f2c505b96fa1b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 14 Apr 2017 14:51:17 -0700 Subject: [PATCH 182/465] mwifiex: pcie: fix cmd_buf use-after-free in remove/reset commit 3c8cb9ad032d737b874e402c59eb51e3c991a144 upstream. Command buffers (skb's) are allocated by the main driver, and freed upon the last use. That last use is often in mwifiex_free_cmd_buffer(). In the meantime, if the command buffer gets used by the PCI driver, we map it as DMA-able, and store the mapping information in the 'cb' memory. However, if a command was in-flight when resetting the device (and therefore was still mapped), we don't get a chance to unmap this memory until after the core has cleaned up its command handling. Let's keep a refcount within the PCI driver, so we ensure the memory only gets freed after we've finished unmapping it. Noticed by KASAN when forcing a reset via: echo 1 > /sys/bus/pci/.../reset The same code path can presumably be exercised in remove() and shutdown(). [ 205.390377] mwifiex_pcie 0000:01:00.0: info: shutdown mwifiex... [ 205.400393] ================================================================== [ 205.407719] BUG: KASAN: use-after-free in mwifiex_unmap_pci_memory.isra.14+0x4c/0x100 [mwifiex_pcie] at addr ffffffc0ad471b28 [ 205.419040] Read of size 16 by task bash/1913 [ 205.423421] ============================================================================= [ 205.431625] BUG skbuff_head_cache (Tainted: G B ): kasan: bad access detected [ 205.439815] ----------------------------------------------------------------------------- [ 205.439815] [ 205.449534] INFO: Allocated in __build_skb+0x48/0x114 age=1311 cpu=4 pid=1913 [ 205.456709] alloc_debug_processing+0x124/0x178 [ 205.461282] ___slab_alloc.constprop.58+0x528/0x608 [ 205.466196] __slab_alloc.isra.54.constprop.57+0x44/0x54 [ 205.471542] kmem_cache_alloc+0xcc/0x278 [ 205.475497] __build_skb+0x48/0x114 [ 205.479019] __netdev_alloc_skb+0xe0/0x170 [ 205.483244] mwifiex_alloc_cmd_buffer+0x68/0xdc [mwifiex] [ 205.488759] mwifiex_init_fw+0x40/0x6cc [mwifiex] [ 205.493584] _mwifiex_fw_dpc+0x158/0x520 [mwifiex] [ 205.498491] mwifiex_reinit_sw+0x2c4/0x398 [mwifiex] [ 205.503510] mwifiex_pcie_reset_notify+0x114/0x15c [mwifiex_pcie] [ 205.509643] pci_reset_notify+0x5c/0x6c [ 205.513519] pci_reset_function+0x6c/0x7c [ 205.517567] reset_store+0x68/0x98 [ 205.521003] dev_attr_store+0x54/0x60 [ 205.524705] sysfs_kf_write+0x9c/0xb0 [ 205.528413] INFO: Freed in __kfree_skb+0xb0/0xbc age=131 cpu=4 pid=1913 [ 205.535064] free_debug_processing+0x264/0x370 [ 205.539550] __slab_free+0x84/0x40c [ 205.543075] kmem_cache_free+0x1c8/0x2a0 [ 205.547030] __kfree_skb+0xb0/0xbc [ 205.550465] consume_skb+0x164/0x178 [ 205.554079] __dev_kfree_skb_any+0x58/0x64 [ 205.558304] mwifiex_free_cmd_buffer+0xa0/0x158 [mwifiex] [ 205.563817] mwifiex_shutdown_drv+0x578/0x5c4 [mwifiex] [ 205.569164] mwifiex_shutdown_sw+0x178/0x310 [mwifiex] [ 205.574353] mwifiex_pcie_reset_notify+0xd4/0x15c [mwifiex_pcie] [ 205.580398] pci_reset_notify+0x5c/0x6c [ 205.584274] pci_dev_save_and_disable+0x24/0x6c [ 205.588837] pci_reset_function+0x30/0x7c [ 205.592885] reset_store+0x68/0x98 [ 205.596324] dev_attr_store+0x54/0x60 [ 205.600017] sysfs_kf_write+0x9c/0xb0 ... [ 205.800488] Call trace: [ 205.802980] [] dump_backtrace+0x0/0x190 [ 205.808415] [] show_stack+0x20/0x28 [ 205.813506] [] dump_stack+0xa4/0xcc [ 205.818598] [] print_trailer+0x158/0x168 [ 205.824120] [] object_err+0x4c/0x5c [ 205.829210] [] kasan_report+0x334/0x500 [ 205.834641] [] check_memory_region+0x20/0x14c [ 205.840593] [] __asan_loadN+0x14/0x1c [ 205.845879] [] mwifiex_unmap_pci_memory.isra.14+0x4c/0x100 [mwifiex_pcie] [ 205.854282] [] mwifiex_pcie_delete_cmdrsp_buf+0x94/0xa8 [mwifiex_pcie] [ 205.862421] [] mwifiex_pcie_free_buffers+0x11c/0x158 [mwifiex_pcie] [ 205.870302] [] mwifiex_pcie_down_dev+0x70/0x80 [mwifiex_pcie] [ 205.877736] [] mwifiex_shutdown_sw+0x190/0x310 [mwifiex] [ 205.884658] [] mwifiex_pcie_reset_notify+0xd4/0x15c [mwifiex_pcie] [ 205.892446] [] pci_reset_notify+0x5c/0x6c [ 205.898048] [] pci_dev_save_and_disable+0x24/0x6c [ 205.904350] [] pci_reset_function+0x30/0x7c [ 205.910134] [] reset_store+0x68/0x98 [ 205.915312] [] dev_attr_store+0x54/0x60 [ 205.920750] [] sysfs_kf_write+0x9c/0xb0 [ 205.926182] [] kernfs_fop_write+0x184/0x1f8 [ 205.931963] [] __vfs_write+0x6c/0x17c [ 205.937221] [] vfs_write+0xf0/0x1c4 [ 205.942310] [] SyS_write+0x78/0xd8 [ 205.947312] [] el0_svc_naked+0x24/0x28 ... [ 205.998268] ================================================================== This bug has been around in different forms for a while. It was sort of noticed in commit 955ab095c51a ("mwifiex: Do not kfree cmd buf while unregistering PCIe"), but it just fixed the double-free, without acknowledging the potential for use-after-free. Fixes: fc3314609047 ("mwifiex: use pci_alloc/free_consistent APIs for PCIe") Signed-off-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/pcie.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index b8c990d10d6e..3045764372b0 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -1039,6 +1039,7 @@ static int mwifiex_pcie_delete_cmdrsp_buf(struct mwifiex_adapter *adapter) if (card && card->cmd_buf) { mwifiex_unmap_pci_memory(adapter, card->cmd_buf, PCI_DMA_TODEVICE); + dev_kfree_skb_any(card->cmd_buf); } return 0; } @@ -1608,6 +1609,11 @@ mwifiex_pcie_send_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb) return -1; card->cmd_buf = skb; + /* + * Need to keep a reference, since core driver might free up this + * buffer before we've unmapped it. + */ + skb_get(skb); /* To send a command, the driver will: 1. Write the 64bit physical address of the data buffer to @@ -1711,6 +1717,7 @@ static int mwifiex_pcie_process_cmd_complete(struct mwifiex_adapter *adapter) if (card->cmd_buf) { mwifiex_unmap_pci_memory(adapter, card->cmd_buf, PCI_DMA_TODEVICE); + dev_kfree_skb_any(card->cmd_buf); card->cmd_buf = NULL; } From efa8cd1e2f0ba01ae9477ccf4abe9fd9f7f1d0a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Fri, 24 Feb 2017 15:05:14 +0100 Subject: [PATCH 183/465] ima: accept previously set IMA_NEW_FILE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1ac202e978e18f045006d75bd549612620c6ec3a upstream. Modifying the attributes of a file makes ima_inode_post_setattr reset the IMA cache flags. So if the file, which has just been created, is opened a second time before the first file descriptor is closed, verification fails since the security.ima xattr has not been written yet. We therefore have to look at the IMA_NEW_FILE even if the file already existed. With this patch there should no longer be an error when cat tries to open testfile: $ rm -f testfile $ ( echo test >&3 ; touch testfile ; cat testfile ) 3>testfile A file being new is no reason to accept that it is missing a digital signature demanded by the policy. Signed-off-by: Daniel Glöckner Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_appraise.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 1fd9539a969d..5d0785cfe063 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -207,10 +207,11 @@ int ima_appraise_measurement(enum ima_hooks func, cause = "missing-hash"; status = INTEGRITY_NOLABEL; - if (opened & FILE_CREATED) { + if (opened & FILE_CREATED) iint->flags |= IMA_NEW_FILE; + if ((iint->flags & IMA_NEW_FILE) && + !(iint->flags & IMA_DIGSIG_REQUIRED)) status = INTEGRITY_PASS; - } goto out; } From 8223e8f994d2a06418eff5349c9532aa53ca4cb0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 11 May 2017 02:58:55 -0700 Subject: [PATCH 184/465] KVM: x86: Fix load damaged SSEx MXCSR register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a575813bfe4bc15aba511a5e91e61d242bff8b9d upstream. Reported by syzkaller: BUG: unable to handle kernel paging request at ffffffffc07f6a2e IP: report_bug+0x94/0x120 PGD 348e12067 P4D 348e12067 PUD 348e14067 PMD 3cbd84067 PTE 80000003f7e87161 Oops: 0003 [#1] SMP CPU: 2 PID: 7091 Comm: kvm_load_guest_ Tainted: G OE 4.11.0+ #8 task: ffff92fdfb525400 task.stack: ffffbda6c3d04000 RIP: 0010:report_bug+0x94/0x120 RSP: 0018:ffffbda6c3d07b20 EFLAGS: 00010202 do_trap+0x156/0x170 do_error_trap+0xa3/0x170 ? kvm_load_guest_fpu.part.175+0x12a/0x170 [kvm] ? mark_held_locks+0x79/0xa0 ? retint_kernel+0x10/0x10 ? trace_hardirqs_off_thunk+0x1a/0x1c do_invalid_op+0x20/0x30 invalid_op+0x1e/0x30 RIP: 0010:kvm_load_guest_fpu.part.175+0x12a/0x170 [kvm] ? kvm_load_guest_fpu.part.175+0x1c/0x170 [kvm] kvm_arch_vcpu_ioctl_run+0xed6/0x1b70 [kvm] kvm_vcpu_ioctl+0x384/0x780 [kvm] ? kvm_vcpu_ioctl+0x384/0x780 [kvm] ? sched_clock+0x13/0x20 ? __do_page_fault+0x2a0/0x550 do_vfs_ioctl+0xa4/0x700 ? up_read+0x1f/0x40 ? __do_page_fault+0x2a0/0x550 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc2 SDM mentioned that "The MXCSR has several reserved bits, and attempting to write a 1 to any of these bits will cause a general-protection exception(#GP) to be generated". The syzkaller forks' testcase overrides xsave area w/ random values and steps on the reserved bits of MXCSR register. The damaged MXCSR register values of guest will be restored to SSEx MXCSR register before vmentry. This patch fixes it by catching userspace override MXCSR register reserved bits w/ random values and bails out immediately. Reported-by: Andrey Konovalov Reviewed-by: Paolo Bonzini Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/init.c | 1 + arch/x86/kvm/x86.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index c2f8dde3255c..d5d44c452624 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -90,6 +90,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) * Boot time FPU feature detection code: */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +EXPORT_SYMBOL_GPL(mxcsr_feature_mask); static void __init fpu__init_system_mxcsr(void) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 421a069b5429..3a969ac83b58 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3307,11 +3307,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, } } +#define XSAVE_MXCSR_OFFSET 24 + static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { u64 xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; + u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; if (boot_cpu_has(X86_FEATURE_XSAVE)) { /* @@ -3319,11 +3322,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility * with old userspace. */ - if (xstate_bv & ~kvm_supported_xcr0()) + if (xstate_bv & ~kvm_supported_xcr0() || + mxcsr & ~mxcsr_feature_mask) return -EINVAL; load_xsave(vcpu, (u8 *)guest_xsave->region); } else { - if (xstate_bv & ~XFEATURE_MASK_FPSSE) + if (xstate_bv & ~XFEATURE_MASK_FPSSE || + mxcsr & ~mxcsr_feature_mask) return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state.fxsave, guest_xsave->region, sizeof(struct fxregs_state)); From b67dcf8cf82c05e352321d5d60c9b8ef8acee93c Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 11 May 2017 18:12:05 -0700 Subject: [PATCH 185/465] KVM: x86: Fix potential preemption when get the current kvmclock timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e2c2206a18993bc9f62393d49c7b2066c3845b25 upstream. BUG: using __this_cpu_read() in preemptible [00000000] code: qemu-system-x86/2809 caller is __this_cpu_preempt_check+0x13/0x20 CPU: 2 PID: 2809 Comm: qemu-system-x86 Not tainted 4.11.0+ #13 Call Trace: dump_stack+0x99/0xce check_preemption_disabled+0xf5/0x100 __this_cpu_preempt_check+0x13/0x20 get_kvmclock_ns+0x6f/0x110 [kvm] get_time_ref_counter+0x5d/0x80 [kvm] kvm_hv_process_stimers+0x2a1/0x8a0 [kvm] ? kvm_hv_process_stimers+0x2a1/0x8a0 [kvm] ? kvm_arch_vcpu_ioctl_run+0xac9/0x1ce0 [kvm] kvm_arch_vcpu_ioctl_run+0x5bf/0x1ce0 [kvm] kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? __fget+0xf3/0x210 do_vfs_ioctl+0xa4/0x700 ? __fget+0x114/0x210 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc2 RIP: 0033:0x7f9d164ed357 ? __this_cpu_preempt_check+0x13/0x20 This can be reproduced by run kvm-unit-tests/hyperv_stimer.flat w/ CONFIG_PREEMPT and CONFIG_DEBUG_PREEMPT enabled. Safe access to per-CPU data requires a couple of constraints, though: the thread working with the data cannot be preempted and it cannot be migrated while it manipulates per-CPU variables. If the thread is preempted, the thread that replaces it could try to work with the same variables; migration to another CPU could also cause confusion. However there is no preemption disable when reads host per-CPU tsc rate to calculate the current kvmclock timestamp. This patch fixes it by utilizing get_cpu/put_cpu pair to guarantee both __this_cpu_read() and rdtsc() are not preempted. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a969ac83b58..25c4d8c95436 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1779,6 +1779,7 @@ static u64 __get_kvmclock_ns(struct kvm *kvm) { struct kvm_arch *ka = &kvm->arch; struct pvclock_vcpu_time_info hv_clock; + u64 ret; spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { @@ -1790,10 +1791,17 @@ static u64 __get_kvmclock_ns(struct kvm *kvm) hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; spin_unlock(&ka->pvclock_gtod_sync_lock); + /* both __this_cpu_read() and rdtsc() should be on the same cpu */ + get_cpu(); + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, &hv_clock.tsc_shift, &hv_clock.tsc_to_system_mul); - return __pvclock_read_cycles(&hv_clock, rdtsc()); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + + put_cpu(); + + return ret; } u64 get_kvmclock_ns(struct kvm *kvm) From 5bbaf88d7953bd0845f54e834b330d4af30aecf9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 19 May 2017 02:46:56 -0700 Subject: [PATCH 186/465] KVM: X86: Fix read out-of-bounds vulnerability in kvm pio emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cbfc6c9184ce71b52df4b1d82af5afc81a709178 upstream. Huawei folks reported a read out-of-bounds vulnerability in kvm pio emulation. - "inb" instruction to access PIT Mod/Command register (ioport 0x43, write only, a read should be ignored) in guest can get a random number. - "rep insb" instruction to access PIT register port 0x43 can control memcpy() in emulator_pio_in_emulated() to copy max 0x400 bytes but only read 1 bytes, which will disclose the unimportant kernel memory in host but no crash. The similar test program below can reproduce the read out-of-bounds vulnerability: void hexdump(void *mem, unsigned int len) { unsigned int i, j; for(i = 0; i < len + ((len % HEXDUMP_COLS) ? (HEXDUMP_COLS - len % HEXDUMP_COLS) : 0); i++) { /* print offset */ if(i % HEXDUMP_COLS == 0) { printf("0x%06x: ", i); } /* print hex data */ if(i < len) { printf("%02x ", 0xFF & ((char*)mem)[i]); } else /* end of block, just aligning for ASCII dump */ { printf(" "); } /* print ASCII dump */ if(i % HEXDUMP_COLS == (HEXDUMP_COLS - 1)) { for(j = i - (HEXDUMP_COLS - 1); j <= i; j++) { if(j >= len) /* end of block, not really printing */ { putchar(' '); } else if(isprint(((char*)mem)[j])) /* printable char */ { putchar(0xFF & ((char*)mem)[j]); } else /* other char */ { putchar('.'); } } putchar('\n'); } } } int main(void) { int i; if (iopl(3)) { err(1, "set iopl unsuccessfully\n"); return -1; } static char buf[0x40]; /* test ioport 0x40,0x41,0x42,0x43,0x44,0x45 */ memset(buf, 0xab, sizeof(buf)); asm volatile("push %rdi;"); asm volatile("mov %0, %%rdi;"::"q"(buf)); asm volatile ("mov $0x40, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x41, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x42, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x43, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x44, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("mov $0x45, %rdx;"); asm volatile ("in %dx,%al;"); asm volatile ("stosb;"); asm volatile ("pop %rdi;"); hexdump(buf, 0x40); printf("\n"); /* ins port 0x40 */ memset(buf, 0xab, sizeof(buf)); asm volatile("push %rdi;"); asm volatile("mov %0, %%rdi;"::"q"(buf)); asm volatile ("mov $0x20, %rcx;"); asm volatile ("mov $0x40, %rdx;"); asm volatile ("rep insb;"); asm volatile ("pop %rdi;"); hexdump(buf, 0x40); printf("\n"); /* ins port 0x43 */ memset(buf, 0xab, sizeof(buf)); asm volatile("push %rdi;"); asm volatile("mov %0, %%rdi;"::"q"(buf)); asm volatile ("mov $0x20, %rcx;"); asm volatile ("mov $0x43, %rdx;"); asm volatile ("rep insb;"); asm volatile ("pop %rdi;"); hexdump(buf, 0x40); printf("\n"); return 0; } The vcpu->arch.pio_data buffer is used by both in/out instrutions emulation w/o clear after using which results in some random datas are left over in the buffer. Guest reads port 0x43 will be ignored since it is write only, however, the function kernel_pio() can't distigush this ignore from successfully reads data from device's ioport. There is no new data fill the buffer from port 0x43, however, emulator_pio_in_emulated() will copy the stale data in the buffer to the guest unconditionally. This patch fixes it by clearing the buffer before in instruction emulation to avoid to grant guest the stale data in the buffer. In addition, string I/O is not supported for in kernel device. So there is no iteration to read ioport %RCX times for string I/O. The function kernel_pio() just reads one round, and then copy the io size * %RCX to the guest unconditionally, actually it copies the one round ioport data w/ other random datas which are left over in the vcpu->arch.pio_data buffer to the guest. This patch fixes it by introducing the string I/O support for in kernel device in order to grant the right ioport datas to the guest. Before the patch: 0x000000: fe 38 93 93 ff ff ab ab .8...... 0x000008: ab ab ab ab ab ab ab ab ........ 0x000010: ab ab ab ab ab ab ab ab ........ 0x000018: ab ab ab ab ab ab ab ab ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: f6 00 00 00 00 00 00 00 ........ 0x000008: 00 00 00 00 00 00 00 00 ........ 0x000010: 00 00 00 00 4d 51 30 30 ....MQ00 0x000018: 30 30 20 33 20 20 20 20 00 3 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: f6 00 00 00 00 00 00 00 ........ 0x000008: 00 00 00 00 00 00 00 00 ........ 0x000010: 00 00 00 00 4d 51 30 30 ....MQ00 0x000018: 30 30 20 33 20 20 20 20 00 3 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ After the patch: 0x000000: 1e 02 f8 00 ff ff ab ab ........ 0x000008: ab ab ab ab ab ab ab ab ........ 0x000010: ab ab ab ab ab ab ab ab ........ 0x000018: ab ab ab ab ab ab ab ab ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: d2 e2 d2 df d2 db d2 d7 ........ 0x000008: d2 d3 d2 cf d2 cb d2 c7 ........ 0x000010: d2 c4 d2 c0 d2 bc d2 b8 ........ 0x000018: d2 b4 d2 b0 d2 ac d2 a8 ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ 0x000000: 00 00 00 00 00 00 00 00 ........ 0x000008: 00 00 00 00 00 00 00 00 ........ 0x000010: 00 00 00 00 00 00 00 00 ........ 0x000018: 00 00 00 00 00 00 00 00 ........ 0x000020: ab ab ab ab ab ab ab ab ........ 0x000028: ab ab ab ab ab ab ab ab ........ 0x000030: ab ab ab ab ab ab ab ab ........ 0x000038: ab ab ab ab ab ab ab ab ........ Reported-by: Moguofang Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Moguofang Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 25c4d8c95436..a4a2bae7c274 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4862,16 +4862,20 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) { - /* TODO: String I/O for in kernel device */ - int r; + int r = 0, i; - if (vcpu->arch.pio.in) - r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, - vcpu->arch.pio.size, pd); - else - r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, - vcpu->arch.pio.port, vcpu->arch.pio.size, - pd); + for (i = 0; i < vcpu->arch.pio.count; i++) { + if (vcpu->arch.pio.in) + r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, + vcpu->arch.pio.size, pd); + else + r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, + vcpu->arch.pio.port, vcpu->arch.pio.size, + pd); + if (r) + break; + pd += vcpu->arch.pio.size; + } return r; } @@ -4909,6 +4913,8 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (vcpu->arch.pio.count) goto data_avail; + memset(vcpu->arch.pio_data, 0, size * count); + ret = emulator_pio_in_out(vcpu, size, port, val, count, true); if (ret) { data_avail: From efb209c87ab2033e7575d29294711a53650a99d2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 May 2017 18:26:54 -0700 Subject: [PATCH 187/465] x86: fix 32-bit case of __get_user_asm_u64() commit 33c9e9729033387ef0521324c62e7eba529294af upstream. The code to fetch a 64-bit value from user space was entirely buggered, and has been since the code was merged in early 2016 in commit b2f680380ddf ("x86/mm/32: Add support for 64-bit __get_user() on 32-bit kernels"). Happily the buggered routine is almost certainly entirely unused, since the normal way to access user space memory is just with the non-inlined "get_user()", and the inlined version didn't even historically exist. The normal "get_user()" case is handled by external hand-written asm in arch/x86/lib/getuser.S that doesn't have either of these issues. There were two independent bugs in __get_user_asm_u64(): - it still did the STAC/CLAC user space access marking, even though that is now done by the wrapper macros, see commit 11f1a4b9755f ("x86: reorganize SMAP handling in user space accesses"). This didn't result in a semantic error, it just means that the inlined optimized version was hugely less efficient than the allegedly slower standard version, since the CLAC/STAC overhead is quite high on modern Intel CPU's. - the double register %eax/%edx was marked as an output, but the %eax part of it was touched early in the asm, and could thus clobber other inputs to the asm that gcc didn't expect it to touch. In particular, that meant that the generated code could look like this: mov (%eax),%eax mov 0x4(%eax),%edx where the load of %edx obviously was _supposed_ to be from the 32-bit word that followed the source of %eax, but because %eax was overwritten by the first instruction, the source of %edx was basically random garbage. The fixes are trivial: remove the extraneous STAC/CLAC entries, and mark the 64-bit output as early-clobber to let gcc know that no inputs should alias with the output register. Cc: Al Viro Cc: Benjamin LaHaise Cc: Ingo Molnar Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index ea148313570f..dead0f3921f3 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -324,10 +324,10 @@ do { \ #define __get_user_asm_u64(x, ptr, retval, errret) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: movl %2,%%eax\n" \ "2: movl %3,%%edx\n" \ - "3: " ASM_CLAC "\n" \ + "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: mov %4,%0\n" \ " xorl %%eax,%%eax\n" \ @@ -336,7 +336,7 @@ do { \ ".previous\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - : "=r" (retval), "=A"(x) \ + : "=r" (retval), "=&A"(x) \ : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ "i" (errret), "0" (retval)); \ }) From fde4f79074beee51ee0f73d73a8e7c8502a7c039 Mon Sep 17 00:00:00 2001 From: Wadim Egorov Date: Wed, 22 Mar 2017 16:50:50 +0100 Subject: [PATCH 188/465] regulator: rk808: Fix RK818 LDO2 commit 75f88115391156b3f0fecbbae76bf870c89bcab8 upstream. Set the correct voltage select register for LDO2. Signed-off-by: Wadim Egorov Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/rk808-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index fb44d5215e30..a16d81420612 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -519,7 +519,7 @@ static const struct regulator_desc rk818_reg[] = { RK818_LDO1_ON_VSEL_REG, RK818_LDO_VSEL_MASK, RK818_LDO_EN_REG, BIT(0), 400), RK8XX_DESC(RK818_ID_LDO2, "LDO_REG2", "vcc6", 1800, 3400, 100, - RK818_LDO1_ON_VSEL_REG, RK818_LDO_VSEL_MASK, RK818_LDO_EN_REG, + RK818_LDO2_ON_VSEL_REG, RK818_LDO_VSEL_MASK, RK818_LDO_EN_REG, BIT(1), 400), { .name = "LDO_REG3", From d2cc3a8cb9e2a1b84aaa4c1a68273e3d24677e06 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Mon, 17 Apr 2017 10:23:36 +0200 Subject: [PATCH 189/465] regulator: tps65023: Fix inverted core enable logic. commit c90722b54a4f5e21ac59301ed9a6dbaa439bdb16 upstream. Commit 43530b69d758328d3ffe6ab98fd640463e8e3667 ("regulator: Use regmap_read/write(), regmap_update_bits functions directly") intended to replace working inline helper functions with standard regmap calls. However, it also inverted the set/clear logic of the "CORE ADJ Allowed" bit. That patch was clearly never tested, since without that bit cleared, the core VDCDC1 voltage output does not react to I2C configuration changes. This patch fixes the issue by clearing the bit as in the original, correct implementation. Note for stable back porting that, due to subsequent driver churn, this patch will not apply on every kernel version. Fixes: 43530b69d758 ("regulator: Use regmap_read/write(), regmap_update_bits functions directly") Signed-off-by: Richard Cochran Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/tps65023-regulator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/regulator/tps65023-regulator.c b/drivers/regulator/tps65023-regulator.c index d2c3d7cc35f5..5ca6d2130593 100644 --- a/drivers/regulator/tps65023-regulator.c +++ b/drivers/regulator/tps65023-regulator.c @@ -311,8 +311,7 @@ static int tps_65023_probe(struct i2c_client *client, /* Enable setting output voltage by I2C */ regmap_update_bits(tps->regmap, TPS65023_REG_CON_CTRL2, - TPS65023_REG_CTRL2_CORE_ADJ, - TPS65023_REG_CTRL2_CORE_ADJ); + TPS65023_REG_CTRL2_CORE_ADJ, 0); return 0; } From 5d1adbaf362a5fe1eabde24229076b2c168809b0 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 23 Mar 2017 21:02:54 +0100 Subject: [PATCH 190/465] s390/kdump: Add final note commit dcc00b79fc3d076832f7240de8870f492629b171 upstream. Since linux v3.14 with commit 38dfac843cb6d7be1 ("vmcore: prevent PT_NOTE p_memsz overflow during header update") on s390 we get the following message in the kdump kernel: Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x6b6b6b6b, n_descsz=0x6b6b6b6b The reason for this is that we don't create a final zero note in the ELF header which the proc/vmcore code uses to find out the end of the notes section (see also kernel/kexec_core.c:final_note()). It still worked on s390 by chance because we (most of the time?) have the byte pattern 0x6b6b6b6b after the notes section which also makes the notes parsing code stop in update_note_header_size_elf64() because 0x6b6b6b6b is interpreded as note size: if ((real_sz + sz) > max_sz) { pr_warn("Warning: Exceeded p_memsz, dropping P ...); break; } So fix this and add the missing final note to the ELF header. We don't have to adjust the memory size for ELF header ("alloc_size") because the new ELF note still fits into the 0x1000 base memory. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/crash_dump.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd1d5c62c374..d628afc26708 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -428,6 +428,20 @@ static void *nt_vmcoreinfo(void *ptr) return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } +/* + * Initialize final note (needed for /proc/vmcore code) + */ +static void *nt_final(void *ptr) +{ + Elf64_Nhdr *note; + + note = (Elf64_Nhdr *) ptr; + note->n_namesz = 0; + note->n_descsz = 0; + note->n_type = 0; + return PTR_ADD(ptr, sizeof(Elf64_Nhdr)); +} + /* * Initialize ELF header (new kernel) */ @@ -515,6 +529,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) if (sa->prefix != 0) ptr = fill_cpu_elf_notes(ptr, cpu++, sa); ptr = nt_vmcoreinfo(ptr); + ptr = nt_final(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; phdr->p_offset = notes_offset; From 82e31de01826459a8ea0829f3f520c95d3fe2b5e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 2 May 2017 13:36:00 +0200 Subject: [PATCH 191/465] s390/cputime: fix incorrect system time commit 07a63cbe8bcb6ba72fb989dcab1ec55ec6c36c7e upstream. git commit c5328901aa1db134 "[S390] entry[64].S improvements" removed the update of the exit_timer lowcore field from the critical section cleanup of the .Lsysc_restore/.Lsysc_done and .Lio_restore/.Lio_done blocks. If the PSW is updated by the critical section cleanup to point to user space again, the interrupt entry code will do a vtime calculation after the cleanup completed with an exit_timer value which has *not* been updated. Due to this incorrect system time deltas are calculated. If an interrupt occured with an old PSW between .Lsysc_restore/.Lsysc_done or .Lio_restore/.Lio_done update __LC_EXIT_TIMER with the system entry time of the interrupt. Tested-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6a7d737d514c..f3920d684b0e 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -314,6 +314,7 @@ ENTRY(system_call) lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) +.Lsysc_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) @@ -601,6 +602,7 @@ ENTRY(io_int_handler) lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) +.Lio_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) @@ -1124,15 +1126,23 @@ cleanup_critical: br %r14 .Lcleanup_sysc_restore: + # check if stpt has been executed clg %r9,BASED(.Lcleanup_sysc_restore_insn) + jh 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC je 0f + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER +0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8) + je 1f lg %r9,24(%r11) # get saved pointer to pt_regs mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) -0: lmg %r8,%r9,__LC_RETURN_PSW +1: lmg %r8,%r9,__LC_RETURN_PSW br %r14 .Lcleanup_sysc_restore_insn: + .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: @@ -1140,15 +1150,20 @@ cleanup_critical: br %r14 .Lcleanup_io_restore: + # check if stpt has been executed clg %r9,BASED(.Lcleanup_io_restore_insn) - je 0f + jh 0f + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER +0: clg %r9,BASED(.Lcleanup_io_restore_insn+8) + je 1f lg %r9,24(%r11) # get saved r11 pointer to pt_regs mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) -0: lmg %r8,%r9,__LC_RETURN_PSW +1: lmg %r8,%r9,__LC_RETURN_PSW br %r14 .Lcleanup_io_restore_insn: + .quad .Lio_exit_timer .quad .Lio_done - 4 .Lcleanup_idle: From 43068b4585721924f83b93f3ea2986b4b3dfb896 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Wed, 8 Mar 2017 13:52:07 +0200 Subject: [PATCH 192/465] ath9k_htc: Add support of AirTies 1eda:2315 AR9271 device commit 16ff1fb0e32f76a5d285a6f23b82d21aa52813c6 upstream. T: Bus=01 Lev=02 Prnt=02 Port=02 Cnt=01 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ff(vend.) Sub=ff Prot=ff MxPS=64 #Cfgs= 1 P: Vendor=1eda ProdID=2315 Rev=01.08 S: Manufacturer=ATHEROS S: Product=USB2.0 WLAN S: SerialNumber=12345 C: #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 6 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) Signed-off-by: Dmitry Tunin Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hif_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index de2d212f39ec..05dd056cab6e 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -37,6 +37,7 @@ static struct usb_device_id ath9k_hif_usb_ids[] = { { USB_DEVICE(0x0cf3, 0xb002) }, /* Ubiquiti WifiStation */ { USB_DEVICE(0x057c, 0x8403) }, /* AVM FRITZ!WLAN 11N v2 USB */ { USB_DEVICE(0x0471, 0x209e) }, /* Philips (or NXP) PTA01 */ + { USB_DEVICE(0x1eda, 0x2315) }, /* AirTies */ { USB_DEVICE(0x0cf3, 0x7015), .driver_info = AR9287_USB }, /* Atheros */ From fab4402325ee8fa32805d4b1601b3ceef6575bed Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:44:20 +0100 Subject: [PATCH 193/465] ath9k_htc: fix NULL-deref at probe commit ebeb36670ecac36c179b5fb5d5c88ff03ba191ec upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. Fixes: 36bcce430657 ("ath9k_htc: Handle storage devices") Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hif_usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 05dd056cab6e..12aa8abbcba4 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1220,6 +1220,9 @@ static int send_eject_command(struct usb_interface *interface) u8 bulk_out_ep; int r; + if (iface_desc->desc.bNumEndpoints < 2) + return -ENODEV; + /* Find bulk out endpoint */ for (r = 1; r >= 0; r--) { endpoint = &iface_desc->endpoint[r].desc; From de2fa45aa9581905632bdd6af7941d100d134af7 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 29 Mar 2017 22:09:11 +0200 Subject: [PATCH 194/465] drm/amdgpu: Make display watermark calculations more accurate commit d63c277dc672e0c568481af043359420fa9d4736 upstream. Avoid big roundoff errors in scanline/hactive durations for high pixel clocks, especially for >= 500 Mhz, and thereby program more accurate display fifo watermarks. Implemented here for DCE 6,8,10,11. Successfully tested on DCE 10 with AMD R9 380 Tonga. Reviewed-by: Alex Deucher Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index d4452d8f76ca..d3db921fc771 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1214,14 +1214,14 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, { struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce10_wm_params wm_low, wm_high; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1236,7 +1236,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1275,7 +1275,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 5b24e89552ec..15ee8eb77656 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1183,14 +1183,14 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, { struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce10_wm_params wm_low, wm_high; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1205,7 +1205,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1244,7 +1244,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 809aa94a0cc1..cb9158b0e52d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -986,7 +986,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce6_wm_params wm_low, wm_high; u32 dram_channels; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 priority_a_mark = 0, priority_b_mark = 0; @@ -996,8 +996,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, fixed20_12 a, b, c; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); priority_a_cnt = 0; priority_b_cnt = 0; @@ -1016,7 +1016,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1043,7 +1043,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index d2590d75aa11..d547bcf0dbc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1098,14 +1098,14 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, { struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce8_wm_params wm_low, wm_high; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1120,7 +1120,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1159,7 +1159,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) From 3f0db81c7d2f499320a358da0ae052edcc3baad9 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 29 Mar 2017 22:09:12 +0200 Subject: [PATCH 195/465] drm/amdgpu: Avoid overflows/divide-by-zero in latency_watermark calculations. commit e190ed1ea7458e446230de4113cc5d53b8dc4ec8 upstream. At dot clocks > approx. 250 Mhz, some of these calcs will overflow and cause miscalculation of latency watermarks, and for some overflows also divide-by-zero driver crash ("divide error: 0000 [#1] PREEMPT SMP" in "dce_v10_0_latency_watermark+0x12d/0x190"). This zero-divide happened, e.g., on AMD Tonga Pro under DCE-10, on a Displayport panel when trying to set a video mode of 2560x1440 at 165 Hz vrefresh with a dot clock of 635.540 Mhz. Refine calculations to avoid the overflows. Tested for DCE-10 with R9 380 Tonga + ASUS ROG PG279 panel. Reviewed-by: Alex Deucher Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 19 +++---------------- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 19 +++---------------- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 19 +++---------------- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 19 +++---------------- 4 files changed, 12 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index d3db921fc771..33541acdf329 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1090,23 +1090,10 @@ static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 15ee8eb77656..1388f8a44a2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1059,23 +1059,10 @@ static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index cb9158b0e52d..bad52c04eebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -861,23 +861,10 @@ static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index d547bcf0dbc0..e52fc925b414 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -974,23 +974,10 @@ static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); From 21d3947bf73b5e2cbb5e8dbbc74f76925ab8be9b Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Mon, 24 Apr 2017 01:02:46 +0200 Subject: [PATCH 196/465] drm/amdgpu: Add missing lb_vblank_lead_lines setup to DCE-6 path. commit effaf848b957fbf72a3b6a1ad87f5e031eda0b75 upstream. This apparently got lost when implementing the new DCE-6 support and would cause failures in pageflip scheduling and timestamping. Signed-off-by: Mario Kleiner Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index bad52c04eebd..ea5365580b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -979,7 +979,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, u32 priority_a_mark = 0, priority_b_mark = 0; u32 priority_a_cnt = PRIORITY_OFF; u32 priority_b_cnt = PRIORITY_OFF; - u32 tmp, arb_control3; + u32 tmp, arb_control3, lb_vblank_lead_lines = 0; fixed20_12 a, b, c; if (amdgpu_crtc->base.enabled && num_heads && mode) { @@ -1091,6 +1091,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, c.full = dfixed_div(c, a); priority_b_mark = dfixed_trunc(c); priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK; + + lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); } /* select wm A */ @@ -1120,6 +1122,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; amdgpu_crtc->wm_high = latency_watermark_a; + + /* Save number of lines the linebuffer leads before the scanout */ + amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } /* watermark setup */ From 2138faf017517acf837816101f778d5c9cda37ef Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 May 2017 17:33:39 +1000 Subject: [PATCH 197/465] drm/nouveau/therm: remove ineffective workarounds for alarm bugs commit e4311ee51d1e2676001b2d8fcefd92bdd79aad85 upstream. These were ineffective due to touching the list without the alarm lock, but should no longer be required. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c index df949fa7d05d..be691a7b972f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c @@ -146,7 +146,7 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode) poll = false; } - if (list_empty(&therm->alarm.head) && poll) + if (poll) nvkm_timer_alarm(tmr, 1000000000ULL, &therm->alarm); spin_unlock_irqrestore(&therm->lock, flags); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c index 91198d79393a..e2feccec25f5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c @@ -83,7 +83,7 @@ nvkm_fan_update(struct nvkm_fan *fan, bool immediate, int target) spin_unlock_irqrestore(&fan->lock, flags); /* schedule next fan update, if not at target speed already */ - if (list_empty(&fan->alarm.head) && target != duty) { + if (target != duty) { u16 bump_period = fan->bios.bump_period; u16 slow_down_period = fan->bios.slow_down_period; u64 delay; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c index 59701b7a6597..ff9fbe7950e5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c @@ -53,7 +53,7 @@ nvkm_fantog_update(struct nvkm_fantog *fan, int percent) duty = !nvkm_gpio_get(gpio, 0, DCB_GPIO_FAN, 0xff); nvkm_gpio_set(gpio, 0, DCB_GPIO_FAN, 0xff, duty); - if (list_empty(&fan->alarm.head) && percent != (duty * 100)) { + if (percent != (duty * 100)) { u64 next_change = (percent * fan->period_us) / 100; if (!duty) next_change = fan->period_us - next_change; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c index b9703c02d8ca..9a79e91fdfdc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c @@ -185,7 +185,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm) spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags); /* schedule the next poll in one second */ - if (therm->func->temp_get(therm) >= 0 && list_empty(&alarm->head)) + if (therm->func->temp_get(therm) >= 0) nvkm_timer_alarm(tmr, 1000000000ULL, alarm); } From 4aecf3a73bf0991366e8efb9c8ab4799c327ade0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 1 May 2017 16:52:03 +1000 Subject: [PATCH 198/465] drm/nouveau/kms/nv50: fix source-rect-only plane updates commit 36601c2b36e27435d9be33cfa092120ff69914eb upstream. This "optimisation" (which was originally meant to skip updating cursor settings in the core channel on position-only updates) turned out to be pointless in the final design of the code before it was merged. Remove it completely, as it breaks other cases. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv50_display.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index a9182d5e6011..2f36be58059d 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -906,11 +906,9 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point))) asyw->set.point = true; - if (!varm || asym || armw->state.fb != asyw->state.fb) { - ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh); - if (ret) - return ret; - } + ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh); + if (ret) + return ret; } else if (varm) { nv50_wndw_atomic_check_release(wndw, asyw, harm); From a5e41c9e5a245fd4cd08012831c68e28ec9402cb Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 1 May 2017 16:53:40 +1000 Subject: [PATCH 199/465] drm/nouveau/kms/nv50: skip core channel cursor update on position-only changes commit e6db95799b1b870aae15682a6d0898df9e9dfb38 upstream. The DRM core used to only call prepare_fb/cleanup_fb() when a plane's framebuffer changed, which achieved the desired effect. It's apparently now up to the driver to decide on its own. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv50_display.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 2f36be58059d..ce37cbd274da 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -1113,9 +1113,13 @@ static void nv50_curs_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh, struct nv50_wndw_atom *asyw) { - asyh->curs.handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle; - asyh->curs.offset = asyw->image.offset; - asyh->set.curs = asyh->curs.visible; + u32 handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle; + u32 offset = asyw->image.offset; + if (asyh->curs.handle != handle || asyh->curs.offset != offset) { + asyh->curs.handle = handle; + asyh->curs.offset = offset; + asyh->set.curs = asyh->curs.visible; + } } static void From 4c168033dd9a0ef770095461a664cbc860c625b7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 May 2017 16:53:42 +1000 Subject: [PATCH 200/465] drm/nouveau/tmr: ack interrupt before processing alarms commit 3733bd8b407211739e72d051e5f30ad82a52c4bc upstream. Fixes a race where we can miss an alarm that triggers while we're already processing previous alarms. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c index 7b9ce87f0617..7f48249f41de 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c @@ -76,8 +76,8 @@ nv04_timer_intr(struct nvkm_timer *tmr) u32 stat = nvkm_rd32(device, NV04_PTIMER_INTR_0); if (stat & 0x00000001) { - nvkm_timer_alarm_trigger(tmr); nvkm_wr32(device, NV04_PTIMER_INTR_0, 0x00000001); + nvkm_timer_alarm_trigger(tmr); stat &= ~0x00000001; } From 952030b868e98659836dac836648ca815c1631ce Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 May 2017 17:03:05 +1000 Subject: [PATCH 201/465] drm/nouveau/tmr: fix corruption of the pending list when rescheduling an alarm commit 9fc64667ee48c9a25e7dca1a6bcb6906fec5bcc5 upstream. At least therm/fantog "attempts" to work around this issue, which could lead to corruption of the pending alarm list. Fix it properly by not updating the timestamp without the lock held, or trying to add an already pending alarm to the pending alarm list.... Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/nouveau/nvkm/subdev/timer/base.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c index 07dc82bfe346..182f27c73aeb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c @@ -65,14 +65,17 @@ nvkm_timer_alarm(struct nvkm_timer *tmr, u32 nsec, struct nvkm_alarm *alarm) struct nvkm_alarm *list; unsigned long flags; - alarm->timestamp = nvkm_timer_read(tmr) + nsec; - - /* append new alarm to list, in soonest-alarm-first order */ + /* Remove alarm from pending list. + * + * This both protects against the corruption of the list, + * and implements alarm rescheduling/cancellation. + */ spin_lock_irqsave(&tmr->lock, flags); - if (!nsec) { - if (!list_empty(&alarm->head)) - list_del(&alarm->head); - } else { + list_del_init(&alarm->head); + + if (nsec) { + /* Insert into pending list, ordered earliest to latest. */ + alarm->timestamp = nvkm_timer_read(tmr) + nsec; list_for_each_entry(list, &tmr->alarms, head) { if (list->timestamp > alarm->timestamp) break; From 53b8e320383bf7b4311be9ab188515538e712a18 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 May 2017 17:13:29 +1000 Subject: [PATCH 202/465] drm/nouveau/tmr: avoid processing completed alarms when adding a new one commit 330bdf62fe6a6c5b99a647f7bf7157107c9348b3 upstream. The idea here was to avoid having to "manually" program the HW if there's a new earliest alarm. This was lazy and bad, as it leads to loads of fun races between inter-related callers (ie. therm). Turns out, it's not so difficult after all. Go figure ;) Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c index 182f27c73aeb..934a98819b43 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c @@ -80,12 +80,22 @@ nvkm_timer_alarm(struct nvkm_timer *tmr, u32 nsec, struct nvkm_alarm *alarm) if (list->timestamp > alarm->timestamp) break; } + list_add_tail(&alarm->head, &list->head); + + /* Update HW if this is now the earliest alarm. */ + list = list_first_entry(&tmr->alarms, typeof(*list), head); + if (list == alarm) { + tmr->func->alarm_init(tmr, alarm->timestamp); + /* This shouldn't happen if callers aren't stupid. + * + * Worst case scenario is that it'll take roughly + * 4 seconds for the next alarm to trigger. + */ + WARN_ON(alarm->timestamp <= nvkm_timer_read(tmr)); + } } spin_unlock_irqrestore(&tmr->lock, flags); - - /* process pending alarms */ - nvkm_timer_alarm_trigger(tmr); } void From 33f379db4711fe71b9a107256fd07ffd040b9735 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 May 2017 17:19:48 +1000 Subject: [PATCH 203/465] drm/nouveau/tmr: handle races with hw when updating the next alarm time commit 1b0f84380b10ee97f7d2dd191294de9017e94d1d upstream. If the time to the next alarm is short enough, we could race with HW and end up with an ~4 second delay until it triggers. Fix this by checking again after we update HW. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/nouveau/nvkm/subdev/timer/base.c | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c index 934a98819b43..f2a86eae0a0d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c @@ -36,23 +36,29 @@ nvkm_timer_alarm_trigger(struct nvkm_timer *tmr) unsigned long flags; LIST_HEAD(exec); - /* move any due alarms off the pending list */ + /* Process pending alarms. */ spin_lock_irqsave(&tmr->lock, flags); list_for_each_entry_safe(alarm, atemp, &tmr->alarms, head) { - if (alarm->timestamp <= nvkm_timer_read(tmr)) - list_move_tail(&alarm->head, &exec); + /* Have we hit the earliest alarm that hasn't gone off? */ + if (alarm->timestamp > nvkm_timer_read(tmr)) { + /* Schedule it. If we didn't race, we're done. */ + tmr->func->alarm_init(tmr, alarm->timestamp); + if (alarm->timestamp > nvkm_timer_read(tmr)) + break; + } + + /* Move to completed list. We'll drop the lock before + * executing the callback so it can reschedule itself. + */ + list_move_tail(&alarm->head, &exec); } - /* reschedule interrupt for next alarm time */ - if (!list_empty(&tmr->alarms)) { - alarm = list_first_entry(&tmr->alarms, typeof(*alarm), head); - tmr->func->alarm_init(tmr, alarm->timestamp); - } else { + /* Shut down interrupt if no more pending alarms. */ + if (list_empty(&tmr->alarms)) tmr->func->alarm_fini(tmr); - } spin_unlock_irqrestore(&tmr->lock, flags); - /* execute any pending alarm handlers */ + /* Execute completed callbacks. */ list_for_each_entry_safe(alarm, atemp, &exec, head) { list_del_init(&alarm->head); alarm->func(alarm); From 9541621a12a6a9b33c8341046fafd148de88cabd Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Mon, 24 Apr 2017 18:56:50 -0400 Subject: [PATCH 204/465] gpio: omap: return error if requested debounce time is not possible commit 83977443938122baeed28dc9f078db3da9855f7c upstream. omap_gpio_debounce() does not validate that the requested debounce is within a range it can handle. Instead it lets the register value wrap silently, and always returns success. This can lead to all sorts of unexpected behavior, such as gpio_keys asking for a too-long debounce, but getting a very short debounce in practice. Fix this by returning -EINVAL if the requested value does not fit into the register field. If there is no debounce clock available at all, return -ENOTSUPP. Fixes: e85ec6c3047b ("gpio: omap: fix omap2_set_gpio_debounce") Signed-off-by: David Rivshin Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-omap.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index efc85a279d54..fd9bce9889d0 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -208,9 +208,11 @@ static inline void omap_gpio_dbck_disable(struct gpio_bank *bank) * OMAP's debounce time is in 31us steps * = (GPIO_DEBOUNCINGTIME[7:0].DEBOUNCETIME + 1) x 31 * so we need to convert and round up to the closest unit. + * + * Return: 0 on success, negative error otherwise. */ -static void omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset, - unsigned debounce) +static int omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset, + unsigned debounce) { void __iomem *reg; u32 val; @@ -218,11 +220,12 @@ static void omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset, bool enable = !!debounce; if (!bank->dbck_flag) - return; + return -ENOTSUPP; if (enable) { debounce = DIV_ROUND_UP(debounce, 31) - 1; - debounce &= OMAP4_GPIO_DEBOUNCINGTIME_MASK; + if ((debounce & OMAP4_GPIO_DEBOUNCINGTIME_MASK) != debounce) + return -EINVAL; } l = BIT(offset); @@ -255,6 +258,8 @@ static void omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset, bank->context.debounce = debounce; bank->context.debounce_en = val; } + + return 0; } /** @@ -964,14 +969,20 @@ static int omap_gpio_debounce(struct gpio_chip *chip, unsigned offset, { struct gpio_bank *bank; unsigned long flags; + int ret; bank = gpiochip_get_data(chip); raw_spin_lock_irqsave(&bank->lock, flags); - omap2_set_gpio_debounce(bank, offset, debounce); + ret = omap2_set_gpio_debounce(bank, offset, debounce); raw_spin_unlock_irqrestore(&bank->lock, flags); - return 0; + if (ret) + dev_info(chip->parent, + "Could not set line %u debounce to %u microseconds (%d)", + offset, debounce, ret); + + return ret; } static int omap_gpio_set_config(struct gpio_chip *chip, unsigned offset, From a83f3099d0d2ef692200fd790d547356c0059709 Mon Sep 17 00:00:00 2001 From: Tobias Herzog Date: Thu, 30 Mar 2017 22:15:10 +0200 Subject: [PATCH 205/465] cdc-acm: fix possible invalid access when processing notification commit 1bb9914e1730417d530de9ed37e59efdc647146b upstream. Notifications may only be 8 bytes long. Accessing the 9th and 10th byte of unimplemented/unknown notifications may be insecure. Also check the length of known notifications before accessing anything behind the 8th byte. Signed-off-by: Tobias Herzog Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index d5388938bc7a..1c9533b74a55 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -323,6 +323,12 @@ static void acm_ctrl_irq(struct urb *urb) break; case USB_CDC_NOTIFY_SERIAL_STATE: + if (le16_to_cpu(dr->wLength) != 2) { + dev_dbg(&acm->control->dev, + "%s - malformed serial state\n", __func__); + break; + } + newctrl = get_unaligned_le16(data); if (!acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { @@ -359,11 +365,10 @@ static void acm_ctrl_irq(struct urb *urb) default: dev_dbg(&acm->control->dev, - "%s - unknown notification %d received: index %d " - "len %d data0 %d data1 %d\n", + "%s - unknown notification %d received: index %d len %d\n", __func__, - dr->bNotificationType, dr->wIndex, - dr->wLength, data[0], data[1]); + dr->bNotificationType, dr->wIndex, dr->wLength); + break; } exit: From 6c58e144d57d16d320d571007eaaccf83f35efce Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 20 Mar 2017 09:11:49 +0100 Subject: [PATCH 206/465] ohci-pci: add qemu quirk commit 21a60f6e65181cad64fd66ccc8080d413721ba27 upstream. On a loaded virtualization host (dozen guests booting at the same time) it may happen that the ohci controller emulation doesn't manage to do timely frame processing, with the result that the io watchdog fires and considers the controller being dead, even though it's only the emulation being unusual slow due to the load peak. So, add a quirk for qemu and don't use the watchdog in case we figure we are running on emulated ohci. The virtual ohci controller masquerades as apple ohci controller, but we can identify it by subsystem id. Signed-off-by: Gerd Hoffmann Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 3 ++- drivers/usb/host/ohci-pci.c | 16 ++++++++++++++++ drivers/usb/host/ohci.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index b6daf2e69989..5f81a2b69054 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -231,7 +231,8 @@ static int ohci_urb_enqueue ( /* Start up the I/O watchdog timer, if it's not running */ if (!timer_pending(&ohci->io_watchdog) && - list_empty(&ohci->eds_in_use)) { + list_empty(&ohci->eds_in_use) && + !(ohci->flags & OHCI_QUIRK_QEMU)) { ohci->prev_frame_no = ohci_frame_no(ohci); mod_timer(&ohci->io_watchdog, jiffies + IO_WATCHDOG_DELAY); diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index bb1509675727..a84aebe9b0a9 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -164,6 +164,15 @@ static int ohci_quirk_amd700(struct usb_hcd *hcd) return 0; } +static int ohci_quirk_qemu(struct usb_hcd *hcd) +{ + struct ohci_hcd *ohci = hcd_to_ohci(hcd); + + ohci->flags |= OHCI_QUIRK_QEMU; + ohci_dbg(ohci, "enabled qemu quirk\n"); + return 0; +} + /* List of quirks for OHCI */ static const struct pci_device_id ohci_pci_quirks[] = { { @@ -214,6 +223,13 @@ static const struct pci_device_id ohci_pci_quirks[] = { PCI_DEVICE(PCI_VENDOR_ID_ATI, 0x4399), .driver_data = (unsigned long)ohci_quirk_amd700, }, + { + .vendor = PCI_VENDOR_ID_APPLE, + .device = 0x003f, + .subvendor = PCI_SUBVENDOR_ID_REDHAT_QUMRANET, + .subdevice = PCI_SUBDEVICE_ID_QEMU, + .driver_data = (unsigned long)ohci_quirk_qemu, + }, /* FIXME for some of the early AMD 760 southbridges, OHCI * won't work at all. blacklist them. diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h index 37f1725e7a46..a51b189bdbd8 100644 --- a/drivers/usb/host/ohci.h +++ b/drivers/usb/host/ohci.h @@ -418,6 +418,7 @@ struct ohci_hcd { #define OHCI_QUIRK_AMD_PLL 0x200 /* AMD PLL quirk*/ #define OHCI_QUIRK_AMD_PREFETCH 0x400 /* pre-fetch for ISO transfer */ #define OHCI_QUIRK_GLOBAL_SUSPEND 0x800 /* must suspend ports */ +#define OHCI_QUIRK_QEMU 0x1000 /* relax timing expectations */ // there are also chip quirks/bugs in init logic From 4a0e8757ad3e47c797a2cd12c71e6c81e6130134 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 27 Apr 2017 10:53:25 +0530 Subject: [PATCH 207/465] cxl: Force context lock during EEH flow commit ea9a26d117cf0637c71d3e0076f4a124bf5859df upstream. During an eeh event when the cxl card is fenced and card sysfs attr perst_reloads_same_image is set following warning message is seen in the kernel logs: Adapter context unlocked with 0 active contexts ------------[ cut here ]------------ WARNING: CPU: 12 PID: 627 at ../drivers/misc/cxl/main.c:325 cxl_adapter_context_unlock+0x60/0x80 [cxl] Even though this warning is harmless, it clutters the kernel log during an eeh event. This warning is triggered as the EEH callback cxl_pci_error_detected doesn't obtain a context-lock before forcibly detaching all active context and when context-lock is released during call to cxl_configure_adapter from cxl_pci_slot_reset, a warning in cxl_adapter_context_unlock is triggered. To fix this warning, we acquire the adapter context-lock via cxl_adapter_context_lock() in the eeh callback cxl_pci_error_detected() once all the virtual AFU PHBs are notified and their contexts detached. The context-lock is released in cxl_pci_slot_reset() after the adapter is successfully reconfigured and before the we call the slot_reset callback on slice attached device-drivers. Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists") Reported-by: Andrew Donnellan Signed-off-by: Vaibhav Jain Acked-by: Frederic Barrat Reviewed-by: Matthew R. Ochs Tested-by: Uma Krishnan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index b27ea98b781f..dd9a128a2375 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1496,8 +1496,6 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = cxl_native_register_psl_err_irq(adapter))) goto err; - /* Release the context lock as adapter is configured */ - cxl_adapter_context_unlock(adapter); return 0; err: @@ -1596,6 +1594,9 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) if ((rc = cxl_sysfs_adapter_add(adapter))) goto err_put1; + /* Release the context lock as adapter is configured */ + cxl_adapter_context_unlock(adapter); + return adapter; err_put1: @@ -1895,6 +1896,13 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, cxl_ops->afu_deactivate_mode(afu, afu->current_mode); pci_deconfigure_afu(afu); } + + /* should take the context lock here */ + if (cxl_adapter_context_lock(adapter) != 0) + dev_warn(&adapter->dev, + "Couldn't take context lock with %d active-contexts\n", + atomic_read(&adapter->contexts_num)); + cxl_deconfigure_adapter(adapter); return result; @@ -1913,6 +1921,13 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) if (cxl_configure_adapter(adapter, pdev)) goto err; + /* + * Unlock context activation for the adapter. Ideally this should be + * done in cxl_pci_resume but cxlflash module tries to activate the + * master context as part of slot_reset callback. + */ + cxl_adapter_context_unlock(adapter); + for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; From 525d5ef5ae91f4114a819b3e6a27e6e040328e29 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 27 Apr 2017 10:58:22 +0530 Subject: [PATCH 208/465] cxl: Route eeh events to all drivers in cxl_pci_error_detected() commit 4f58f0bf155e87dda31a3088b1e107fa9dd79f0e upstream. Fix a boundary condition where in some cases an eeh event that results in card reset isn't passed on to a driver attached to the virtual PCI device associated with a slice. This will happen in case when a slice attached device driver returns a value other than PCI_ERS_RESULT_NEED_RESET from the eeh error_detected() callback. This would result in an early return from cxl_pci_error_detected() and other drivers attached to other AFUs on the card wont be notified. The patch fixes this by making sure that all slice attached device-drivers are notified and the return values from error_detected() callback are aggregated in a scheme where request for 'disconnect' trumps all and 'none' trumps 'need_reset'. Fixes: 9e8df8a21963 ("cxl: EEH support") Signed-off-by: Vaibhav Jain Reviewed-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index dd9a128a2375..f98d64963022 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1782,7 +1782,7 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, { struct cxl *adapter = pci_get_drvdata(pdev); struct cxl_afu *afu; - pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET, afu_result; int i; /* At this point, we could still have an interrupt pending. @@ -1886,15 +1886,18 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - result = cxl_vphb_error_detected(afu, state); - - /* Only continue if everyone agrees on NEED_RESET */ - if (result != PCI_ERS_RESULT_NEED_RESET) - return result; + afu_result = cxl_vphb_error_detected(afu, state); cxl_context_detach_all(afu); cxl_ops->afu_deactivate_mode(afu, afu->current_mode); pci_deconfigure_afu(afu); + + /* Disconnect trumps all, NONE trumps NEED_RESET */ + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + else if ((afu_result == PCI_ERS_RESULT_NONE) && + (result == PCI_ERS_RESULT_NEED_RESET)) + result = PCI_ERS_RESULT_NONE; } /* should take the context lock here */ From d5331e619aa3787ea7d55923a94c82abb409527f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 28 Apr 2017 15:00:15 +0200 Subject: [PATCH 209/465] proc: Fix unbalanced hard link numbers commit d66bb1607e2d8d384e53f3d93db5c18483c8c4f7 upstream. proc_create_mount_point() forgot to increase the parent's nlink, and it resulted in unbalanced hard link numbers, e.g. /proc/fs shows one less than expected. Fixes: eb6d38d5427b ("proc: Allow creating permanently empty directories...") Reported-by: Tristan Ye Signed-off-by: Takashi Iwai Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- fs/proc/generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index ee27feb34cf4..9425c0d97262 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -472,6 +472,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->data = NULL; ent->proc_fops = NULL; ent->proc_iops = NULL; + parent->nlink++; if (proc_register(parent, ent) < 0) { kfree(ent); parent->nlink--; From 854d0231ef1baa5294a86d62c43d4376b057b33b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 4 May 2017 12:34:30 -0500 Subject: [PATCH 210/465] of: fix sparse warning in of_pci_range_parser_one commit eb3100365791b06242b8bb5c3c2854ba41dabfbc upstream. sparse gives the following warning for 'pci_space': ../drivers/of/address.c:266:26: warning: incorrect type in assignment (different base types) ../drivers/of/address.c:266:26: expected unsigned int [unsigned] [usertype] pci_space ../drivers/of/address.c:266:26: got restricted __be32 const [usertype] It appears that pci_space is only ever accessed on powerpc, so the endian swap is often not needed. Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 02b2903fe9d2..72914cdfce2a 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -263,7 +263,7 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, if (!parser->range || parser->range + parser->np > parser->end) return NULL; - range->pci_space = parser->range[0]; + range->pci_space = be32_to_cpup(parser->range); range->flags = of_bus_pci_get_flags(parser->range); range->pci_addr = of_read_number(parser->range + 1, ns); range->cpu_addr = of_translate_address(parser->node, From 5bc2cfba8984b3757a479818ca5ce50b5c613007 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 17 Apr 2017 20:29:17 -0400 Subject: [PATCH 211/465] of: fix "/cpus" reference leak in of_numa_parse_cpu_nodes() commit b8475cbee5ab2eac05f9cd5dbcc94c453d3cbf10 upstream. The call to of_find_node_by_path("/cpus") returns the cpus device_node with its reference count incremented. There is no matching of_node_put() call in of_numa_parse_cpu_nodes() which results in a leaked reference to the "/cpus" node. This patch adds an of_node_put() to release the reference. fixes: 298535c00a2c ("of, numa: Add NUMA of binding implementation.") Signed-off-by: Tyrel Datwyler Acked-by: David Daney Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/of_numa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index a53982a330ea..2db1f7a04baf 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -57,6 +57,8 @@ static void __init of_numa_parse_cpu_nodes(void) else node_set(nid, numa_nodes_parsed); } + + of_node_put(cpus); } static int __init of_numa_parse_memory_nodes(void) From c3f0eff47d38a9acfeed4e927cf4ab88ddeab766 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 17 May 2017 17:29:09 +0200 Subject: [PATCH 212/465] of: fdt: add missing allocation-failure check commit 49e67dd17649b60b4d54966e18ec9c80198227f0 upstream. The memory allocator passed to __unflatten_device_tree() (e.g. a wrapped kzalloc) can fail so add the missing sanity check to avoid dereferencing a NULL pointer. Fixes: fe14042358fa ("of/flattree: Refactor unflatten_device_tree and add fdt_unflatten_tree") Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/fdt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index e5ce4b59e162..2e09b8ed03ac 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -505,6 +505,9 @@ static void *__unflatten_device_tree(const void *blob, /* Allocate memory for the expanded device tree */ mem = dt_alloc(size + 4, __alignof__(struct device_node)); + if (!mem) + return NULL; + memset(mem, 0, size); *(__be32 *)(mem + size) = cpu_to_be32(0xdeadbeef); From 5d6d5f07f8be73bd752e47169fe2e17096d7ae10 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 May 2017 14:17:15 -0500 Subject: [PATCH 213/465] ibmvscsis: Do not send aborted task response commit 25e78531268e9240fc594ce76587601b873d37c9 upstream. The driver is sending a response to the actual scsi op that was aborted by an abort task TM, while LIO is sending a response to the abort task TM. ibmvscsis_tgt does not send the response to the client until release_cmd time. The reason for this was because if we did it at queue_status time, then the client would be free to reuse the tag for that command, but we're still using the tag until the command is released at release_cmd time, so we chose to delay sending the response until then. That then caused this issue, because release_cmd is always called, even if queue_status is not. SCSI spec says that the initiator that sends the abort task TM NEVER gets a response to the aborted op and with the current code it will send a response. Thus this fix will remove that response if the CMD_T_ABORTED && !CMD_T_TAS. Another case with a small timing window is the case where if LIO sends a TMR_DOES_NOT_EXIST, and the release_cmd callback is called for the TMR Abort cmd before the release_cmd for the (attemped) aborted cmd, then we need to ensure that we send the response for the (attempted) abort cmd to the client before we send the response for the TMR Abort cmd. Signed-off-by: Bryant G. Ly Signed-off-by: Michael Cyr Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 114 ++++++++++++++++++----- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h | 2 + 2 files changed, 91 insertions(+), 25 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 0f807798c624..d390325c99ec 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1170,6 +1170,7 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi) cmd = list_first_entry_or_null(&vscsi->free_cmd, struct ibmvscsis_cmd, list); if (cmd) { + cmd->flags &= ~(DELAY_SEND); list_del(&cmd->list); cmd->iue = iue; cmd->type = UNSET_TYPE; @@ -1749,45 +1750,79 @@ static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc) static void ibmvscsis_send_messages(struct scsi_info *vscsi) { u64 msg_hi = 0; - /* note do not attmempt to access the IU_data_ptr with this pointer + /* note do not attempt to access the IU_data_ptr with this pointer * it is not valid */ struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi; struct ibmvscsis_cmd *cmd, *nxt; struct iu_entry *iue; long rc = ADAPT_SUCCESS; + bool retry = false; if (!(vscsi->flags & RESPONSE_Q_DOWN)) { - list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) { - iue = cmd->iue; + do { + retry = false; + list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, + list) { + /* + * Check to make sure abort cmd gets processed + * prior to the abort tmr cmd + */ + if (cmd->flags & DELAY_SEND) + continue; - crq->valid = VALID_CMD_RESP_EL; - crq->format = cmd->rsp.format; + if (cmd->abort_cmd) { + retry = true; + cmd->abort_cmd->flags &= ~(DELAY_SEND); + } - if (cmd->flags & CMD_FAST_FAIL) - crq->status = VIOSRP_ADAPTER_FAIL; + /* + * If CMD_T_ABORTED w/o CMD_T_TAS scenarios and + * the case where LIO issued a + * ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST + * case then we dont send a response, since it + * was already done. + */ + if (cmd->se_cmd.transport_state & CMD_T_ABORTED && + !(cmd->se_cmd.transport_state & CMD_T_TAS)) { + list_del(&cmd->list); + ibmvscsis_free_cmd_resources(vscsi, + cmd); + } else { + iue = cmd->iue; - crq->IU_length = cpu_to_be16(cmd->rsp.len); + crq->valid = VALID_CMD_RESP_EL; + crq->format = cmd->rsp.format; - rc = h_send_crq(vscsi->dma_dev->unit_address, - be64_to_cpu(msg_hi), - be64_to_cpu(cmd->rsp.tag)); + if (cmd->flags & CMD_FAST_FAIL) + crq->status = VIOSRP_ADAPTER_FAIL; - pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n", - cmd, be64_to_cpu(cmd->rsp.tag), rc); + crq->IU_length = cpu_to_be16(cmd->rsp.len); - /* if all ok free up the command element resources */ - if (rc == H_SUCCESS) { - /* some movement has occurred */ - vscsi->rsp_q_timer.timer_pops = 0; - list_del(&cmd->list); + rc = h_send_crq(vscsi->dma_dev->unit_address, + be64_to_cpu(msg_hi), + be64_to_cpu(cmd->rsp.tag)); - ibmvscsis_free_cmd_resources(vscsi, cmd); - } else { - srp_snd_msg_failed(vscsi, rc); - break; + pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n", + cmd, be64_to_cpu(cmd->rsp.tag), rc); + + /* if all ok free up the command + * element resources + */ + if (rc == H_SUCCESS) { + /* some movement has occurred */ + vscsi->rsp_q_timer.timer_pops = 0; + list_del(&cmd->list); + + ibmvscsis_free_cmd_resources(vscsi, + cmd); + } else { + srp_snd_msg_failed(vscsi, rc); + break; + } + } } - } + } while (retry); if (!rc) { /* @@ -2708,6 +2743,7 @@ static int ibmvscsis_alloc_cmds(struct scsi_info *vscsi, int num) for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num; i++, cmd++) { + cmd->abort_cmd = NULL; cmd->adapter = vscsi; INIT_WORK(&cmd->work, ibmvscsis_scheduler); list_add_tail(&cmd->list, &vscsi->free_cmd); @@ -3579,9 +3615,20 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd) { struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, se_cmd); + struct scsi_info *vscsi = cmd->adapter; struct iu_entry *iue = cmd->iue; int rc; + /* + * If CLIENT_FAILED OR RESPONSE_Q_DOWN, then just return success + * since LIO can't do anything about it, and we dont want to + * attempt an srp_transfer_data. + */ + if ((vscsi->flags & (CLIENT_FAILED | RESPONSE_Q_DOWN))) { + pr_err("write_pending failed since: %d\n", vscsi->flags); + return 0; + } + rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, 1, 1); if (rc) { @@ -3660,11 +3707,28 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd) struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, se_cmd); struct scsi_info *vscsi = cmd->adapter; + struct ibmvscsis_cmd *cmd_itr; + struct iu_entry *iue = iue = cmd->iue; + struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt; + u64 tag_to_abort = be64_to_cpu(srp_tsk->task_tag); uint len; pr_debug("queue_tm_rsp %p, status %d\n", se_cmd, (int)se_cmd->se_tmr_req->response); + if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK && + cmd->se_cmd.se_tmr_req->response == TMR_TASK_DOES_NOT_EXIST) { + spin_lock_bh(&vscsi->intr_lock); + list_for_each_entry(cmd_itr, &vscsi->active_q, list) { + if (tag_to_abort == cmd_itr->se_cmd.tag) { + cmd_itr->abort_cmd = cmd; + cmd->flags |= DELAY_SEND; + break; + } + } + spin_unlock_bh(&vscsi->intr_lock); + } + srp_build_response(vscsi, cmd, &len); cmd->rsp.format = SRP_FORMAT; cmd->rsp.len = len; @@ -3672,8 +3736,8 @@ static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd) static void ibmvscsis_aborted_task(struct se_cmd *se_cmd) { - /* TBD: What (if anything) should we do here? */ - pr_debug("ibmvscsis_aborted_task %p\n", se_cmd); + pr_debug("ibmvscsis_aborted_task %p task_tag: %llu\n", + se_cmd, se_cmd->tag); } static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf, diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h index 65c6189885ab..b4391a8de456 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h @@ -168,10 +168,12 @@ struct ibmvscsis_cmd { struct iu_rsp rsp; struct work_struct work; struct scsi_info *adapter; + struct ibmvscsis_cmd *abort_cmd; /* Sense buffer that will be mapped into outgoing status */ unsigned char sense_buf[TRANSPORT_SENSE_BUFFER]; u64 init_time; #define CMD_FAST_FAIL BIT(0) +#define DELAY_SEND BIT(1) u32 flags; char type; }; From 6567967aad735de020bbcdee28e60c090f24bd95 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 27 Apr 2017 15:08:26 -0700 Subject: [PATCH 214/465] scsi: lpfc: Fix panic on BFS configuration commit 4492b739c9ccfaf828bd7c02dc779ec2a5e55ff4 upstream. To select the appropriate shost template, the driver is issuing a mailbox command to retrieve the wwn. Turns out the sending of the command precedes the reset of the function. On SLI-4 adapters, this is inconsequential as the mailbox command location is specified by dma via the BMBX register. However, on SLI-3 adapters, the location of the mailbox command submission area changes. When the function is first powered on or reset, the cmd is submitted via PCI bar memory. Later the driver changes the function config to use host memory and DMA. The request to start a mailbox command is the same, a simple doorbell write, regardless of submission area. So.. if there has not been a boot driver run against the adapter, the mailbox command works as defaults are ok. But, if the boot driver has configured the card and, and if no platform pci function/slot reset occurs as the os starts, the mailbox command will fail. The SLI-3 device will use the stale boot driver dma location. This can cause PCI eeh errors. Fix is to reset the sli-3 function before sending the mailbox command, thus synchronizing the function/driver on mailbox location. Note: The fix uses routines that are typically invoked later in the call flow to reset the sli-3 device. The issue in using those routines is that the normal (non-fix) flow does additional initialization, namely the allocation of the pport structure. So, rather than significantly reworking the initialization flow so that the pport is alloc'd first, pointer checks are added to work around it. Checks are limited to the routines invoked by a sli-3 adapter (s3 routines) as this fix/early call is only invoked on a sli3 adapter. Nothing changes post the fix. Subsequent initialization, and another adapter reset, still occur - both on sli-3 and sli-4 adapters. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Fixes: 96418b5e2c88 ("scsi: lpfc: Fix eh_deadline setting for sli3 adapters.") Reviewed-by: Ewan D. Milne Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_crtn.h | 1 + drivers/scsi/lpfc/lpfc_init.c | 7 +++++++ drivers/scsi/lpfc/lpfc_sli.c | 19 ++++++++++++------- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 54e6ac42fbcd..e1081c491edc 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -289,6 +289,7 @@ int lpfc_selective_reset(struct lpfc_hba *); void lpfc_reset_barrier(struct lpfc_hba *); int lpfc_sli_brdready(struct lpfc_hba *, uint32_t); int lpfc_sli_brdkill(struct lpfc_hba *); +int lpfc_sli_chipset_init(struct lpfc_hba *phba); int lpfc_sli_brdreset(struct lpfc_hba *); int lpfc_sli_brdrestart(struct lpfc_hba *); int lpfc_sli_hba_setup(struct lpfc_hba *); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 6cc561b04211..5ec0f4f32c17 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3563,6 +3563,13 @@ lpfc_get_wwpn(struct lpfc_hba *phba) LPFC_MBOXQ_t *mboxq; MAILBOX_t *mb; + if (phba->sli_rev < LPFC_SLI_REV4) { + /* Reset the port first */ + lpfc_sli_brdrestart(phba); + rc = lpfc_sli_chipset_init(phba); + if (rc) + return (uint64_t)-1; + } mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1c9fa45df7eb..4e2e675a624f 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4204,13 +4204,16 @@ lpfc_sli_brdreset(struct lpfc_hba *phba) /* Reset HBA */ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "0325 Reset HBA Data: x%x x%x\n", - phba->pport->port_state, psli->sli_flag); + (phba->pport) ? phba->pport->port_state : 0, + psli->sli_flag); /* perform board reset */ phba->fc_eventTag = 0; phba->link_events = 0; - phba->pport->fc_myDID = 0; - phba->pport->fc_prevDID = 0; + if (phba->pport) { + phba->pport->fc_myDID = 0; + phba->pport->fc_prevDID = 0; + } /* Turn off parity checking and serr during the physical reset */ pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value); @@ -4336,7 +4339,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba) /* Restart HBA */ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "0337 Restart HBA Data: x%x x%x\n", - phba->pport->port_state, psli->sli_flag); + (phba->pport) ? phba->pport->port_state : 0, + psli->sli_flag); word0 = 0; mb = (MAILBOX_t *) &word0; @@ -4350,7 +4354,7 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba) readl(to_slim); /* flush */ /* Only skip post after fc_ffinit is completed */ - if (phba->pport->port_state) + if (phba->pport && phba->pport->port_state) word0 = 1; /* This is really setting up word1 */ else word0 = 0; /* This is really setting up word1 */ @@ -4359,7 +4363,8 @@ lpfc_sli_brdrestart_s3(struct lpfc_hba *phba) readl(to_slim); /* flush */ lpfc_sli_brdreset(phba); - phba->pport->stopped = 0; + if (phba->pport) + phba->pport->stopped = 0; phba->link_state = LPFC_INIT_START; phba->hba_flag = 0; spin_unlock_irq(&phba->hbalock); @@ -4446,7 +4451,7 @@ lpfc_sli_brdrestart(struct lpfc_hba *phba) * iteration, the function will restart the HBA again. The function returns * zero if HBA successfully restarted else returns negative error code. **/ -static int +int lpfc_sli_chipset_init(struct lpfc_hba *phba) { uint32_t status, i = 0; From 327f5da04e62124e5a79d050f084f1cec7af85e2 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Thu, 13 Apr 2017 14:54:23 -0700 Subject: [PATCH 215/465] iio: dac: ad7303: fix channel description commit ce420fd4251809b4c3119b3b20c8b13bd8eba150 upstream. realbits, storagebits and shift should be numbers, not ASCII characters. Signed-off-by: Pavel Roskin Reviewed-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/ad7303.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ad7303.c b/drivers/iio/dac/ad7303.c index e690dd11e99f..4b0f942b8914 100644 --- a/drivers/iio/dac/ad7303.c +++ b/drivers/iio/dac/ad7303.c @@ -184,9 +184,9 @@ static const struct iio_chan_spec_ext_info ad7303_ext_info[] = { .address = (chan), \ .scan_type = { \ .sign = 'u', \ - .realbits = '8', \ - .storagebits = '8', \ - .shift = '0', \ + .realbits = 8, \ + .storagebits = 8, \ + .shift = 0, \ }, \ .ext_info = ad7303_ext_info, \ } From 705fa4038f549b342854818483a88b7e126e43c5 Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Mon, 10 Apr 2017 19:00:01 +0200 Subject: [PATCH 216/465] IIO: bmp280-core.c: fix error in humidity calculation commit ed3730c435f1a9f9559ed7762035d22d8a95adfe upstream. While calculating the compensation of the humidity there are negative values interpreted as unsigned because of unsigned variables used. These values as well as the constants need to be casted to signed as indicated by the documentation of the sensor. Signed-off-by: Andreas Klinger Acked-by: Linus Walleij Reviewed-by: Matt Ranostay Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/pressure/bmp280-core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 4d18826ac63c..d82b788374b6 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -175,11 +175,12 @@ static u32 bmp280_compensate_humidity(struct bmp280_data *data, } H6 = sign_extend32(tmp, 7); - var = ((s32)data->t_fine) - 76800; - var = ((((adc_humidity << 14) - (H4 << 20) - (H5 * var)) + 16384) >> 15) - * (((((((var * H6) >> 10) * (((var * H3) >> 11) + 32768)) >> 10) - + 2097152) * H2 + 8192) >> 14); - var -= ((((var >> 15) * (var >> 15)) >> 7) * H1) >> 4; + var = ((s32)data->t_fine) - (s32)76800; + var = ((((adc_humidity << 14) - (H4 << 20) - (H5 * var)) + + (s32)16384) >> 15) * (((((((var * H6) >> 10) + * (((var * (s32)H3) >> 11) + (s32)32768)) >> 10) + + (s32)2097152) * H2 + 8192) >> 14); + var -= ((((var >> 15) * (var >> 15)) >> 7) * (s32)H1) >> 4; return var >> 12; }; From c07927502b49f3335f565da8d790eaeb54f1aace Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 7 Apr 2017 13:53:46 +0200 Subject: [PATCH 217/465] iio: stm32 trigger: fix sampling_frequency read commit 77a9febfd81f9e8550d09dc76e8e9c06307b7aca upstream. When prescaler (PSC) is 0, it means div factor is 1: counter clock frequency is equal to input clk / (PSC + 1). When reload value is 8 for example, counter counts 9 cycles, from 0 to 8. This is handled in frequency write routine, by writing respectively: - prescaler - 1 to PSC - reload value - 1 to ARR This fix does the opposite when reading the frequency from PSC and ARR: - prescaler is PSC + 1 - reload value is ARR + 1 Thus, PSC may be 0, depending on requested sampling frequency (div 1). In this case, reading freq wrongly reports 0, instead of computing and reporting correct value. Remove test on !psc and !arr. Small test on stm32f4 (example on tim1_trgo), before this fix: $ cd /sys/bus/iio/devices/triggerX $ echo 10000 > sampling_frequency $ cat sampling_frequency 0 After this fix: $ echo 10000 > sampling_frequency $ cat sampling_frequency 10000 Signed-off-by: Fabrice Gasnier Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/trigger/stm32-timer-trigger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 994b96d19750..5ee362bc618c 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -152,10 +152,10 @@ static ssize_t stm32_tt_read_frequency(struct device *dev, regmap_read(priv->regmap, TIM_PSC, &psc); regmap_read(priv->regmap, TIM_ARR, &arr); - if (psc && arr && (cr1 & TIM_CR1_CEN)) { + if (cr1 & TIM_CR1_CEN) { freq = (unsigned long long)clk_get_rate(priv->clk); - do_div(freq, psc); - do_div(freq, arr); + do_div(freq, psc + 1); + do_div(freq, arr + 1); } return sprintf(buf, "%d\n", (unsigned int)freq); From d971ab21c98607b819377785268f90138f12ddd6 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:14:28 -0700 Subject: [PATCH 218/465] IB/hfi1: Return an error on memory allocation failure commit 94679061dcdddbafcf24e3bfb526e54dedcc2f2f upstream. If the eager buffer allocation fails, it is necessary to return an error code. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f40864e9a3b2..a3dd27b1305d 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1758,6 +1758,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) { dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n", rcd->ctxt); + ret = -ENOMEM; goto bail_rcvegrbuf_phys; } From 4cbd5018c4d2d2afa24625a93189b58314999352 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 4 May 2017 05:14:34 -0700 Subject: [PATCH 219/465] IB/hfi1: Fix a subcontext memory leak commit 224d71f910102c966cdcd782c97e096d5e26e4da upstream. The only context that frees user_exp_rcv data structures is the last context closed (from a sub-context set). This leaks the allocations from the other sub-contexts. Separate the common frees from the specific frees and call them at the appropriate time. Using KEDR to check for memory leaks we get: Before test: [leak_check] Possible leaks: 25 After test: [leak_check] Possible leaks: 31 (6 leaked data structures) After patch applied (before and after test have the same value) [leak_check] Possible leaks: 25 Each leak is 192 + 13440 + 6720 = 20352 bytes per sub-context. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/file_ops.c | 5 +++- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 32 +++++++++++++---------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 1 + 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f78c739b330a..db02517b763b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -752,6 +752,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* release the cpu */ hfi1_put_proc_affinity(fdata->rec_cpu_num); + /* clean up rcv side */ + hfi1_user_exp_rcv_free(fdata); + /* * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. @@ -791,7 +794,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_free(fdata); + hfi1_user_exp_rcv_grp_free(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); uctxt->rcvwait_to = 0; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4a8295399e71..ffe6ca12b48d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -251,36 +251,40 @@ int hfi1_user_exp_rcv_init(struct file *fp) return ret; } +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) +{ + struct tid_group *grp, *gptr; + + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + hfi1_clear_tids(uctxt); +} + int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct tid_group *grp, *gptr; - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) - return 0; /* * The notifier would have been removed when the process'es mm * was freed. */ - if (fd->handler) + if (fd->handler) { hfi1_mmu_rb_unregister(fd->handler); - - kfree(fd->invalid_tids); - - if (!uctxt->cnt) { + } else { if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); } + kfree(fd->invalid_tids); + fd->invalid_tids = NULL; + kfree(fd->entry_to_rb); + fd->entry_to_rb = NULL; return 0; } diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9bc8d9fba87e..d1d7d3d3bd44 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -70,6 +70,7 @@ (tid) |= EXP_TID_SET(field, (value)); \ } while (0) +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); int hfi1_user_exp_rcv_init(struct file *); int hfi1_user_exp_rcv_free(struct hfi1_filedata *); int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *); From 9fbdfb7af4db0a3ca01d19cd2d0a83f6e5443cf1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 11 May 2017 18:21:01 -0500 Subject: [PATCH 220/465] pid_ns: Sleep in TASK_INTERRUPTIBLE in zap_pid_ns_processes commit b9a985db98961ae1ba0be169f19df1c567e4ffe0 upstream. The code can potentially sleep for an indefinite amount of time in zap_pid_ns_processes triggering the hung task timeout, and increasing the system average. This is undesirable. Sleep with a task state of TASK_INTERRUPTIBLE instead of TASK_UNINTERRUPTIBLE to remove these undesirable side effects. Apparently under heavy load this has been allowing Chrome to trigger the hung time task timeout error and cause ChromeOS to reboot. Reported-by: Vovo Yang Reported-by: Guenter Roeck Tested-by: Guenter Roeck Fixes: 6347e9009104 ("pidns: guarantee that the pidns init will be the last pidns process reaped") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/pid_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index de461aa0bf9a..6e51b8820495 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -277,7 +277,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) * if reparented. */ for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); if (pid_ns->nr_hashed == init_pids) break; schedule(); From 15e4ed2a46587a8e3085299ae443c96459ce8856 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 12 May 2017 19:11:31 +0300 Subject: [PATCH 221/465] pid_ns: Fix race between setns'ed fork() and zap_pid_ns_processes() commit 3fd37226216620c1a468afa999739d5016fbc349 upstream. Imagine we have a pid namespace and a task from its parent's pid_ns, which made setns() to the pid namespace. The task is doing fork(), while the pid namespace's child reaper is dying. We have the race between them: Task from parent pid_ns Child reaper copy_process() .. alloc_pid() .. .. zap_pid_ns_processes() .. disable_pid_allocation() .. read_lock(&tasklist_lock) .. iterate over pids in pid_ns .. kill tasks linked to pids .. read_unlock(&tasklist_lock) write_lock_irq(&tasklist_lock); .. attach_pid(p, PIDTYPE_PID); .. .. .. So, just created task p won't receive SIGKILL signal, and the pid namespace will be in contradictory state. Only manual kill will help there, but does the userspace care about this? I suppose, the most users just inject a task into a pid namespace and wait a SIGCHLD from it. The patch fixes the problem. It simply checks for (pid_ns->nr_hashed & PIDNS_HASH_ADDING) in copy_process(). We do it under the tasklist_lock, and can't skip PIDNS_HASH_ADDING as noted by Oleg: "zap_pid_ns_processes() does disable_pid_allocation() and then takes tasklist_lock to kill the whole namespace. Given that copy_process() checks PIDNS_HASH_ADDING under write_lock(tasklist) they can't race; if copy_process() takes this lock first, the new child will be killed, otherwise copy_process() can't miss the change in ->nr_hashed." If allocation is disabled, we just return -ENOMEM like it's made for such cases in alloc_pid(). v2: Do not move disable_pid_allocation(), do not introduce a new variable in copy_process() and simplify the patch as suggested by Oleg Nesterov. Account the problem with double irq enabling found by Eric W. Biederman. Fixes: c876ad768215 ("pidns: Stop pid allocation when init dies") Signed-off-by: Kirill Tkhai CC: Andrew Morton CC: Ingo Molnar CC: Peter Zijlstra CC: Oleg Nesterov CC: Mike Rapoport CC: Michal Hocko CC: Andy Lutomirski CC: "Eric W. Biederman" CC: Andrei Vagin CC: Cyrill Gorcunov CC: Serge Hallyn Acked-by: Oleg Nesterov Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93d..fa2ad9829bba 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1815,11 +1815,13 @@ static __latent_entropy struct task_struct *copy_process( */ recalc_sigpending(); if (signal_pending(current)) { - spin_unlock(¤t->sighand->siglock); - write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cancel_cgroup; } + if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) { + retval = -ENOMEM; + goto bad_fork_cancel_cgroup; + } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -1877,6 +1879,8 @@ static __latent_entropy struct task_struct *copy_process( return p; bad_fork_cancel_cgroup: + spin_unlock(¤t->sighand->siglock); + write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p); bad_fork_free_pid: cgroup_threadgroup_change_end(current); From 14bd189bef6597d2e29d1a18085637e4ae9ec67d Mon Sep 17 00:00:00 2001 From: Anthony Mallet Date: Fri, 5 May 2017 17:30:16 +0200 Subject: [PATCH 222/465] USB: serial: ftdi_sio: fix setting latency for unprivileged users commit bb246681b3ed0967489a7401ad528c1aaa1a4c2e upstream. Commit 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY flag") enables unprivileged users to set the FTDI latency timer, but there was a logic flaw that skipped sending the corresponding USB control message to the device. Specifically, the device latency timer would not be updated until next open, something which was later also inadvertently broken by commit c19db4c9e49a ("USB: ftdi_sio: set device latency timeout at port probe"). A recent commit c6dce2626606 ("USB: serial: ftdi_sio: fix extreme low-latency setting") disabled the low-latency mode by default so we now need this fix to allow unprivileged users to again enable it. Signed-off-by: Anthony Mallet [johan: amend commit message] Fixes: 557aaa7ffab6 ("ft232: support the ASYNC_LOW_LATENCY flag") Fixes: c19db4c9e49a ("USB: ftdi_sio: set device latency timeout at port probe"). Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 03e6319b6d1c..84d0df99362b 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1506,9 +1506,9 @@ static int set_serial_info(struct tty_struct *tty, (new_serial.flags & ASYNC_FLAGS)); priv->custom_divisor = new_serial.custom_divisor; +check_and_exit: write_latency_timer(port); -check_and_exit: if ((old_priv.flags & ASYNC_SPD_MASK) != (priv->flags & ASYNC_SPD_MASK)) { if ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) From 7ed35d6ca7f7d4289bccf9b7fe43b1cdd9f2ce72 Mon Sep 17 00:00:00 2001 From: Andrey Korolyov Date: Tue, 16 May 2017 23:54:41 +0300 Subject: [PATCH 223/465] USB: serial: ftdi_sio: add Olimex ARM-USB-TINY(H) PIDs commit 5f63424ab7daac840df2b12dd5bcc5b38d50f779 upstream. This patch adds support for recognition of ARM-USB-TINY(H) devices which are almost identical to ARM-USB-OCD(H) but lacking separate barrel jack and serial console. By suggestion from Johan Hovold it is possible to replace ftdi_jtag_quirk with a bit more generic construction. Since all Olimex-ARM debuggers has exactly two ports, we could safely always use only second port within the debugger family. Signed-off-by: Andrey Korolyov Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 8 ++++---- drivers/usb/serial/ftdi_sio_ids.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 84d0df99362b..f187e13c47e8 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -809,10 +809,10 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) }, { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, - { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID), - .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, - { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID), - .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) }, { USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID), diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 71fb9e59db71..4fcf1cecb6d7 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -882,6 +882,8 @@ /* Olimex */ #define OLIMEX_VID 0x15BA #define OLIMEX_ARM_USB_OCD_PID 0x0003 +#define OLIMEX_ARM_USB_TINY_PID 0x0004 +#define OLIMEX_ARM_USB_TINY_H_PID 0x002a #define OLIMEX_ARM_USB_OCD_H_PID 0x002b /* From d08975541f4d6a8d25213415fe4d574aec123b16 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:36:01 +0200 Subject: [PATCH 224/465] USB: chaoskey: fix Alea quirk on big-endian hosts commit 63afd5cc78775018ea2dec4004428dafa5283e93 upstream. Add missing endianness conversion when applying the Alea timeout quirk. Found using sparse: warning: restricted __le16 degrades to integer Fixes: e4a886e811cd ("hwrng: chaoskey - Fix URB warning due to timeout on Alea") Cc: Bob Ham Cc: Herbert Xu Cc: Keith Packard Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/chaoskey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index aa350dc9eb25..142acf1e00f7 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -194,7 +194,7 @@ static int chaoskey_probe(struct usb_interface *interface, dev->in_ep = in_ep; - if (udev->descriptor.idVendor != ALEA_VENDOR_ID) + if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID) dev->reads_started = 1; dev->size = size; From 7868d9ff2ef1dfdf1c256d12d947685fa7a5090e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 May 2017 23:30:16 +0100 Subject: [PATCH 225/465] Make stat/lstat/fstatat pass AT_NO_AUTOMOUNT to vfs_statx() commit deccf497d804a4c5fca2dbfad2f104675a6f9102 upstream. stat/lstat/fstatat need to pass AT_NO_AUTOMOUNT to vfs_statx() as the pre-statx code didn't set LOOKUP_AUTOMOUNT, even though fstatat() accepted the AT_NO_AUTOMOUNT flag. Fixes: a528d35e8bfc ("statx: Add a system call to make enhanced file info available") Reported-by: Ian Kent Signed-off-by: David Howells Tested-by: Ian Kent Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8..741563098d98 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2921,17 +2921,19 @@ extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { - return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); + return vfs_statx(AT_FDCWD, filename, AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_lstat(const char __user *name, struct kstat *stat) { - return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { From 3ef206af544316fc4674f461c4caa287221a8875 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 27 Apr 2017 16:57:05 -0600 Subject: [PATCH 226/465] libnvdimm: fix clear length of nvdimm_forget_poison() commit 8d13c0290655b883df9083a2a0af0d782bc38aef upstream. ND_CMD_CLEAR_ERROR command returns 'clear_err.cleared', the length of error actually cleared, which may be smaller than its requested 'len'. Change nvdimm_clear_poison() to call nvdimm_forget_poison() with 'clear_err.cleared' when this value is valid. Fixes: e046114af5fc ("libnvdimm: clear the internal poison_list when clearing badblocks") Cc: Dave Jiang Cc: Vishal Verma Signed-off-by: Toshi Kani Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 351bac8f6503..0392eb8a0dea 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -218,7 +218,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, if (cmd_rc < 0) return cmd_rc; - nvdimm_clear_from_poison_list(nvdimm_bus, phys, len); + if (clear_err.cleared > 0) + nvdimm_clear_from_poison_list(nvdimm_bus, phys, + clear_err.cleared); + return clear_err.cleared; } EXPORT_SYMBOL_GPL(nvdimm_clear_poison); From 0a4115da6135293f0568c85a9c621190e1695333 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 12 May 2017 15:46:57 -0700 Subject: [PATCH 227/465] dax: fix data corruption when fault races with write commit 13e451fdc1af05568ea379d71c02a126295d2244 upstream. Currently DAX read fault can race with write(2) in the following way: CPU1 - write(2) CPU2 - read fault dax_iomap_pte_fault() ->iomap_begin() - sees hole dax_iomap_rw() iomap_apply() ->iomap_begin - allocates blocks dax_iomap_actor() invalidate_inode_pages2_range() - there's nothing to invalidate grab_mapping_entry() - we add zero page in the radix tree and map it to page tables The result is that hole page is mapped into page tables (and thus zeros are seen in mmap) while file has data written in that place. Fix the problem by locking exception entry before mapping blocks for the fault. That way we are sure invalidate_inode_pages2_range() call for racing write will either block on entry lock waiting for the fault to finish (and unmap stale page tables after that) or read fault will see already allocated blocks by write(2). Fixes: 9f141d6ef6258a3a37a045842d9ba7e68f368956 Link: http://lkml.kernel.org/r/20170510085419.27601-5-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Ross Zwisler Reviewed-by: Ross Zwisler Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b87f3ab742ba..db0cc52eb22e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1124,23 +1124,23 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) flags |= IOMAP_WRITE; + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); + if (IS_ERR(entry)) + return dax_fault_return(PTR_ERR(entry)); + /* * Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means * that we never have to deal with more than a single extent here. */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); - if (error) - return dax_fault_return(error); - if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { - vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ - goto finish_iomap; + if (error) { + vmf_ret = dax_fault_return(error); + goto unlock_entry; } - - entry = grab_mapping_entry(mapping, vmf->pgoff, 0); - if (IS_ERR(entry)) { - vmf_ret = dax_fault_return(PTR_ERR(entry)); - goto finish_iomap; + if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { + error = -EIO; /* fs corruption? */ + goto error_finish_iomap; } sector = dax_iomap_sector(&iomap, pos); @@ -1162,13 +1162,13 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, } if (error) - goto error_unlock_entry; + goto error_finish_iomap; __SetPageUptodate(vmf->cow_page); vmf_ret = finish_fault(vmf); if (!vmf_ret) vmf_ret = VM_FAULT_DONE_COW; - goto unlock_entry; + goto finish_iomap; } switch (iomap.type) { @@ -1188,7 +1188,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, case IOMAP_HOLE: if (!(vmf->flags & FAULT_FLAG_WRITE)) { vmf_ret = dax_load_hole(mapping, &entry, vmf); - goto unlock_entry; + goto finish_iomap; } /*FALLTHRU*/ default: @@ -1197,10 +1197,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, break; } - error_unlock_entry: + error_finish_iomap: vmf_ret = dax_fault_return(error) | major; - unlock_entry: - put_locked_mapping_entry(mapping, vmf->pgoff, entry); finish_iomap: if (ops->iomap_end) { int copied = PAGE_SIZE; @@ -1215,6 +1213,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, */ ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); } + unlock_entry: + put_locked_mapping_entry(mapping, vmf->pgoff, entry); return vmf_ret; } From 1b8de0a2bb88636afcd1e48c8d20bf9d14eca27d Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 27 Apr 2017 12:11:54 -0500 Subject: [PATCH 228/465] EDAC, amd64: Fix reporting of Chip Select sizes on Fam17h commit eb77e6b80f3bed262c7773236f0fb84649fd3091 upstream. The wrong index into the csbases/csmasks arrays was being passed to the function to compute the chip select sizes, which resulted in the wrong size being computed. Address that so that the correct values are computed and printed. Also, redo how we calculate the number of pages in a CS row. Reported-by: Benjamin Bennett Signed-off-by: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/1493313114-11260-1-git-send-email-Yazen.Ghannam@amd.com [ Remove unneeded integer math comment, minor cleanups. ] Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/amd64_edac.c | 40 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 82dab1692264..3aea55698165 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -782,24 +782,26 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { - u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; - int dimm, size0, size1; + int dimm, size0, size1, cs0, cs1; edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); for (dimm = 0; dimm < 4; dimm++) { size0 = 0; + cs0 = dimm * 2; - if (dcsb[dimm*2] & DCSB_CS_ENABLE) - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm); + if (csrow_enabled(cs0, ctrl, pvt)) + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0); size1 = 0; - if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm); + cs1 = dimm * 2 + 1; + + if (csrow_enabled(cs1, ctrl, pvt)) + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", - dimm * 2, size0, - dimm * 2 + 1, size1); + cs0, size0, + cs1, size1); } } @@ -2756,26 +2758,22 @@ static void read_mc_regs(struct amd64_pvt *pvt) * encompasses * */ -static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) +static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) { - u32 cs_mode, nr_pages; u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; + int csrow_nr = csrow_nr_orig; + u32 cs_mode, nr_pages; + if (!pvt->umc) + csrow_nr >>= 1; - /* - * The math on this doesn't look right on the surface because x/2*4 can - * be simplified to x*2 but this expression makes use of the fact that - * it is integral math where 1/2=0. This intermediate value becomes the - * number of bits to shift the DBAM register to extract the proper CSROW - * field. - */ - cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); + cs_mode = DBAM_DIMM(csrow_nr, dbam); - nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2)) - << (20 - PAGE_SHIFT); + nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", - csrow_nr, dct, cs_mode); + csrow_nr_orig, dct, cs_mode); edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); return nr_pages; From d0c8639797278a78df3fd3a49b10ce3e7ca3d503 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 17 May 2017 18:32:05 +0300 Subject: [PATCH 229/465] xhci: Fix command ring stop regression in 4.11 commit 604d02a2a66ab7f93fd3b2bde3698c29ef057b65 upstream. In 4.11 TRB completion codes were renamed to match spec. Completion codes for command ring stopped and endpoint stopped were mixed, leading to failures while handling a stopped command ring. Use the correct completion code for command ring stopped events. Fixes: 0b7c105a04ca ("usb: host: xhci: rename completion codes to match spec") Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci-ring.c | 8 ++++---- drivers/usb/host/xhci.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 3bddeaa1e2d7..144f5fbd6aa3 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -421,7 +421,7 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) wait_for_completion(cmd->completion); if (cmd->status == COMP_COMMAND_ABORTED || - cmd->status == COMP_STOPPED) { + cmd->status == COMP_COMMAND_RING_STOPPED) { xhci_warn(xhci, "Timeout while waiting for stop endpoint command\n"); ret = -ETIME; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index a3309aa02993..adf50d94a103 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -321,7 +321,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if (i_cmd->status != COMP_COMMAND_ABORTED) continue; - i_cmd->status = COMP_STOPPED; + i_cmd->status = COMP_COMMAND_RING_STOPPED; xhci_dbg(xhci, "Turn aborted command %p to no-op\n", i_cmd->command_trb); @@ -1342,7 +1342,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd_comp_code = GET_COMP_CODE(le32_to_cpu(event->status)); /* If CMD ring stopped we own the trbs between enqueue and dequeue */ - if (cmd_comp_code == COMP_STOPPED) { + if (cmd_comp_code == COMP_COMMAND_RING_STOPPED) { complete_all(&xhci->cmd_ring_stop_completion); return; } @@ -1397,8 +1397,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, break; case TRB_CMD_NOOP: /* Is this an aborted command turned to NO-OP? */ - if (cmd->status == COMP_STOPPED) - cmd_comp_code = COMP_STOPPED; + if (cmd->status == COMP_COMMAND_RING_STOPPED) + cmd_comp_code = COMP_COMMAND_RING_STOPPED; break; case TRB_RESET_EP: WARN_ON(slot_id != TRB_TO_SLOT_ID( diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 953fd8f62df0..b1f779817ffe 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1805,7 +1805,7 @@ static int xhci_configure_endpoint_result(struct xhci_hcd *xhci, switch (*cmd_status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for configure endpoint command\n"); ret = -ETIME; break; @@ -1856,7 +1856,7 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci, switch (*cmd_status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for evaluate context command\n"); ret = -ETIME; break; @@ -3478,7 +3478,7 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) ret = reset_device_cmd->status; switch (ret) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout waiting for reset device command\n"); ret = -ETIME; goto command_cleanup; @@ -3845,7 +3845,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, */ switch (command->status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for setup device command\n"); ret = -ETIME; break; From c3dc5f5ae54c9a2697f91144da6a72a24ed22055 Mon Sep 17 00:00:00 2001 From: Matthias Lange Date: Wed, 17 May 2017 18:32:04 +0300 Subject: [PATCH 230/465] xhci: remove GFP_DMA flag from allocation commit 5db851cf20857c5504b146046e97cb7781f2a743 upstream. There is no reason to restrict allocations to the first 16MB ISA DMA addresses. It is causing problems in a virtualization setup with enabled IOMMU (x86_64). The result is that USB is not working in the VM. Signed-off-by: Matthias Lange Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 3f8f28f6fa94..6ef63828f552 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -56,7 +56,7 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, } if (max_packet) { - seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA); + seg->bounce_buf = kzalloc(max_packet, flags); if (!seg->bounce_buf) { dma_pool_free(xhci->segment_pool, seg->trbs, dma); kfree(seg); From 354c85b7adb91c3e99efd7d3f0e8307062341e2a Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 17 May 2017 18:32:06 +0300 Subject: [PATCH 231/465] usb: host: xhci-plat: propagate return value of platform_get_irq() commit 4b148d5144d64ee135b8924350cb0b3a7fd21150 upstream. platform_get_irq() returns an error code, but the xhci-plat driver ignores it and always returns -ENODEV. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Signed-off-by: Thomas Petazzoni Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 6ed468fa7d5e..66ddd080a2a8 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -162,7 +162,7 @@ static int xhci_plat_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENODEV; + return irq; /* Try to set 64-bit DMA first */ if (!pdev->dev.dma_mask) From 5077513b4e8c0ab4d314b68d73f23e78bacf7a44 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 17 May 2017 18:32:03 +0300 Subject: [PATCH 232/465] USB: xhci: fix lock-inversion problem commit 63aea0dbab90a2461faaae357cbc8cfd6c8de9fe upstream. With threaded interrupts, bottom-half handlers are called with interrupts enabled. Therefore they can't safely use spin_lock(); they have to use spin_lock_irqsave(). Lockdep warns about a violation occurring in xhci_irq(): ========================================================= [ INFO: possible irq lock inversion dependency detected ] 4.11.0-rc8-dbg+ #1 Not tainted --------------------------------------------------------- swapper/7/0 just changed the state of lock: (&(&ehci->lock)->rlock){-.-...}, at: [] ehci_hrtimer_func+0x29/0xc0 [ehci_hcd] but this lock took another, HARDIRQ-unsafe lock in the past: (hcd_urb_list_lock){+.....} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(hcd_urb_list_lock); local_irq_disable(); lock(&(&ehci->lock)->rlock); lock(hcd_urb_list_lock); lock(&(&ehci->lock)->rlock); *** DEADLOCK *** no locks held by swapper/7/0. the shortest dependencies between 2nd lock and 1st lock: -> (hcd_urb_list_lock){+.....} ops: 252 { HARDIRQ-ON-W at: __lock_acquire+0x602/0x1280 lock_acquire+0xd5/0x1c0 _raw_spin_lock+0x2f/0x40 usb_hcd_unlink_urb_from_ep+0x1b/0x60 [usbcore] xhci_giveback_urb_in_irq.isra.45+0x70/0x1b0 [xhci_hcd] finish_td.constprop.60+0x1d8/0x2e0 [xhci_hcd] xhci_irq+0xdd6/0x1fa0 [xhci_hcd] usb_hcd_irq+0x26/0x40 [usbcore] irq_forced_thread_fn+0x2f/0x70 irq_thread+0x149/0x1d0 kthread+0x113/0x150 ret_from_fork+0x2e/0x40 This patch fixes the problem. Signed-off-by: Alan Stern Reported-and-tested-by: Bart Van Assche Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index adf50d94a103..c5b7b7bdffba 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2616,11 +2616,12 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) struct xhci_hcd *xhci = hcd_to_xhci(hcd); union xhci_trb *event_ring_deq; irqreturn_t ret = IRQ_NONE; + unsigned long flags; dma_addr_t deq; u64 temp_64; u32 status; - spin_lock(&xhci->lock); + spin_lock_irqsave(&xhci->lock, flags); /* Check if the xHC generated the interrupt, or the irq is shared */ status = readl(&xhci->op_regs->status); if (status == 0xffffffff) { @@ -2695,7 +2696,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) ret = IRQ_HANDLED; out: - spin_unlock(&xhci->lock); + spin_unlock_irqrestore(&xhci->lock, flags); return ret; } From 966e88d54088007cbacedcbe3d2a76812f03dc0f Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 17 May 2017 18:32:00 +0300 Subject: [PATCH 233/465] xhci: apply PME_STUCK_QUIRK and MISSING_CAS quirk for Denverton commit a0c16630d35a874e82bdf2088f58ecaca1024315 upstream. Intel Denverton microserver is Atom based and need the PME and CAS quirks as well. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7b86508ac8cf..fcf1f3f63e7a 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -52,6 +52,7 @@ #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 +#define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 static const char hcd_name[] = "xhci_hcd"; @@ -166,7 +167,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && @@ -175,7 +177,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) xhci->quirks |= XHCI_MISSING_CAS; if (pdev->vendor == PCI_VENDOR_ID_ETRON && From 0cc9aa4c4bd4876eaeb2f5511ea306512855eef5 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 17 May 2017 18:32:01 +0300 Subject: [PATCH 234/465] usb: host: xhci-mem: allocate zeroed Scratchpad Buffer commit 7480d912d549f414e0ce39331870899e89a5598c upstream. According to xHCI ch4.20 Scratchpad Buffers, the Scratchpad Buffer needs to be zeroed. ... The following operations take place to allocate Scratchpad Buffers to the xHC: ... b. Software clears the Scratchpad Buffer to '0' Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 6ef63828f552..96e133f4f7e7 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1729,7 +1729,7 @@ static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags) xhci->dcbaa->dev_context_ptrs[0] = cpu_to_le64(xhci->scratchpad->sp_dma); for (i = 0; i < num_sp; i++) { dma_addr_t dma; - void *buf = dma_alloc_coherent(dev, xhci->page_size, &dma, + void *buf = dma_zalloc_coherent(dev, xhci->page_size, &dma, flags); if (!buf) goto fail_sp5; From 3aade67b765fd28f5332335f59cb3300f2b4c3f9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 May 2017 12:11:13 +0200 Subject: [PATCH 235/465] net: irda: irda-usb: fix firmware name on big-endian hosts commit 75cf067953d5ee543b3bda90bbfcbee5e1f94ae8 upstream. Add missing endianness conversion when using the USB device-descriptor bcdDevice field to construct a firmware file name. Fixes: 8ef80aef118e ("[IRDA]: irda-usb.c: STIR421x cleanups") Cc: Nick Fedchik Signed-off-by: Johan Hovold Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/irda/irda-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index 8716b8c07feb..6f3c805f7211 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -1077,7 +1077,7 @@ static int stir421x_patch_device(struct irda_usb_cb *self) * are "42101001.sb" or "42101002.sb" */ sprintf(stir421x_fw_name, "4210%4X.sb", - self->usbdev->descriptor.bcdDevice); + le16_to_cpu(self->usbdev->descriptor.bcdDevice)); ret = request_firmware(&fw, stir421x_fw_name, &self->usbdev->dev); if (ret < 0) return ret; From 03a7e78a25f6b4694220f30bb9e6d78ad86efaca Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 09:53:55 -0300 Subject: [PATCH 236/465] usbvision: fix NULL-deref at probe commit eacb975b48272f54532b62f515a3cf7eefa35123 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. Fixes: 2a9f8b5d25be ("V4L/DVB (5206): Usbvision: set alternate interface modification") Cc: Thierry MERLE Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/usbvision/usbvision-video.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index f5c635a67d74..f9c3325aa4d4 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -1501,7 +1501,14 @@ static int usbvision_probe(struct usb_interface *intf, } for (i = 0; i < usbvision->num_alt; i++) { - u16 tmp = le16_to_cpu(uif->altsetting[i].endpoint[1].desc. + u16 tmp; + + if (uif->altsetting[i].desc.bNumEndpoints < 2) { + ret = -ENODEV; + goto err_pkt; + } + + tmp = le16_to_cpu(uif->altsetting[i].endpoint[1].desc. wMaxPacketSize); usbvision->alt_max_pkt_size[i] = (tmp & 0x07ff) * (((tmp & 0x1800) >> 11) + 1); From f2b30b2f19a9cc013fbb04144cea3896df2eabb1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2017 15:14:13 -0300 Subject: [PATCH 237/465] mceusb: fix NULL-deref at probe commit 03eb2a557ed552e920a0942b774aaf931596eec1 upstream. Make sure to check for the required out endpoint to avoid dereferencing a NULL-pointer in mce_request_packet should a malicious device lack such an endpoint. Note that this path is hit during probe. Fixes: 66e89522aff7 ("V4L/DVB: IR: add mceusb IR receiver driver") Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/mceusb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index 238d8eaf7d94..93b16fe3ab38 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1288,8 +1288,8 @@ static int mceusb_dev_probe(struct usb_interface *intf, } } } - if (ep_in == NULL) { - dev_dbg(&intf->dev, "inbound and/or endpoint not found"); + if (!ep_in || !ep_out) { + dev_dbg(&intf->dev, "required endpoints not found\n"); return -ENODEV; } From 8d730619ae01ebb8049f3ab292e262048ab0f3ca Mon Sep 17 00:00:00 2001 From: Alyssa Milburn Date: Sat, 1 Apr 2017 14:34:32 -0300 Subject: [PATCH 238/465] ttusb2: limit messages to buffer size commit a12b8ab8c5ff7ccd7b107a564743507c850a441d upstream. Otherwise ttusb2_i2c_xfer can read or write beyond the end of static and heap buffers. Signed-off-by: Alyssa Milburn Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/ttusb2.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/media/usb/dvb-usb/ttusb2.c b/drivers/media/usb/dvb-usb/ttusb2.c index ecc207fbaf3c..9e0d6a4166d2 100644 --- a/drivers/media/usb/dvb-usb/ttusb2.c +++ b/drivers/media/usb/dvb-usb/ttusb2.c @@ -78,6 +78,9 @@ static int ttusb2_msg(struct dvb_usb_device *d, u8 cmd, u8 *s, *r = NULL; int ret = 0; + if (4 + rlen > 64) + return -EIO; + s = kzalloc(wlen+4, GFP_KERNEL); if (!s) return -ENOMEM; @@ -381,6 +384,22 @@ static int ttusb2_i2c_xfer(struct i2c_adapter *adap,struct i2c_msg msg[],int num write_read = i+1 < num && (msg[i+1].flags & I2C_M_RD); read = msg[i].flags & I2C_M_RD; + if (3 + msg[i].len > sizeof(obuf)) { + err("i2c wr len=%d too high", msg[i].len); + break; + } + if (write_read) { + if (3 + msg[i+1].len > sizeof(ibuf)) { + err("i2c rd len=%d too high", msg[i+1].len); + break; + } + } else if (read) { + if (3 + msg[i].len > sizeof(ibuf)) { + err("i2c rd len=%d too high", msg[i].len); + break; + } + } + obuf[0] = (msg[i].addr << 1) | (write_read | read); if (read) obuf[1] = 0; From 79312c5d61429a32761412be85ad7a84bfaa3b5c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 17 Feb 2017 22:30:51 -0200 Subject: [PATCH 239/465] dvb-usb-dibusb-mc-common: Add MODULE_LICENSE commit bf05b65a9fe5f6a6dd3e72cab2aacd8b5b96e41d upstream. dvb-usb-dibusb-mc-common is licensed under GPLv2, and if we don't say so then it won't even load since it needs a GPL-only symbol. Fixes: e91455a1495a ("[media] dvb-usb: split out common parts of dibusb") Reported-by: Dominique Dumont Signed-off-by: Ben Hutchings Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dibusb-mc-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/usb/dvb-usb/dibusb-mc-common.c b/drivers/media/usb/dvb-usb/dibusb-mc-common.c index c989cac9343d..0c2bc97436d5 100644 --- a/drivers/media/usb/dvb-usb/dibusb-mc-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-mc-common.c @@ -11,6 +11,8 @@ #include "dibusb.h" +MODULE_LICENSE("GPL"); + /* 3000MC/P stuff */ // Config Adjacent channels Perf -cal22 static struct dibx000_agc_config dib3000p_mt2060_agc_config = { From 33c19ef5e24a2fa281e13af657c5714bc8da6f17 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 11 May 2017 17:26:47 -0700 Subject: [PATCH 240/465] usb: dwc3: gadget: Prevent losing events in event cache commit d325a1de49d61ee11aca58a529571c91ecea7879 upstream. The dwc3 driver can overwite its previous events if its top-half IRQ handler (TH) gets invoked again before processing the events in the cache. We see this as a hang in the file transfer and the host will attempt to reset the device. TH gets the event count and deasserts the interrupt line by writing DWC3_GEVNTSIZ_INTMASK to DWC3_GEVNTSIZ. If there's a new event coming between reading the event count and interrupt deassertion, dwc3 will lose previous pending events. More generally, we will see 0 event count, which should not affect anything. This shouldn't be possible in the current dwc3 implementation. However, through testing and reading the PCIe trace, the TH occasionally still gets invoked one more time after HW interrupt deassertion. (With PCIe legacy interrupts, TH is called repeatedly as long as the interrupt line is asserted). We suspect that there is a small detection delay in the SW. To avoid this issue, Check DWC3_EVENT_PENDING flag to determine if the events are processed in the bottom-half IRQ handler. If not, return IRQ_HANDLED and don't process new event. Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 79e7a3480d51..81199f6ee3bc 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3078,6 +3078,15 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) return IRQ_HANDLED; } + /* + * With PCIe legacy interrupt, test shows that top-half irq handler can + * be called again after HW interrupt deassertion. Check if bottom-half + * irq event handler completes before caching new event to prevent + * losing events. + */ + if (evt->flags & DWC3_EVENT_PENDING) + return IRQ_HANDLED; + count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0)); count &= DWC3_GEVNTCOUNT_MASK; if (!count) From 925f7f9244b2d166efab7c2f92ab7b5d8c2f7717 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 17 May 2017 11:23:11 -0500 Subject: [PATCH 241/465] usb: musb: tusb6010_omap: Do not reset the other direction's packet size commit 6df2b42f7c040d57d9ecb67244e04e905ab87ac6 upstream. We have one register for each EP to set the maximum packet size for both TX and RX. If for example an RX programming would happen before the previous TX transfer finishes we would reset the TX packet side. To fix this issue, only modify the TX or RX part of the register. Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") Signed-off-by: Peter Ujfalusi Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/tusb6010_omap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c index 8b43c4b99f04..7870b37e0ea5 100644 --- a/drivers/usb/musb/tusb6010_omap.c +++ b/drivers/usb/musb/tusb6010_omap.c @@ -219,6 +219,7 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz, u32 dma_remaining; int src_burst, dst_burst; u16 csr; + u32 psize; int ch; s8 dmareq; s8 sync_dev; @@ -390,15 +391,19 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz, if (chdat->tx) { /* Send transfer_packet_sz packets at a time */ - musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, - chdat->transfer_packet_sz); + psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET); + psize &= ~0x7ff; + psize |= chdat->transfer_packet_sz; + musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize); musb_writel(ep_conf, TUSB_EP_TX_OFFSET, TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len)); } else { /* Receive transfer_packet_sz packets at a time */ - musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, - chdat->transfer_packet_sz << 16); + psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET); + psize &= ~(0x7ff << 16); + psize |= (chdat->transfer_packet_sz << 16); + musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize); musb_writel(ep_conf, TUSB_EP_RX_OFFSET, TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len)); From d69737cb64abae66b8d80ee977612a52e9f2d0d8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 17 May 2017 11:23:10 -0500 Subject: [PATCH 242/465] usb: musb: Fix trying to suspend while active for OTG configurations commit 3c50ffef25855a9d9e4b07b02d756a8cdd653069 upstream. Commit d8e5f0eca1e8 ("usb: musb: Fix hardirq-safe hardirq-unsafe lock order error") caused a regression where musb keeps trying to enable host mode with no cable connected. This seems to be caused by the fact that now phy is enabled earlier, and we are wrongly trying to force USB host mode on an OTG port. The errors we are getting are "trying to suspend as a_idle while active". For ports configured as OTG, we should not need to do anything to try to force USB host mode on it's OTG port. Trying to force host mode in this case just seems to completely confuse the musb state machine. Let's fix the issue by making musb_host_setup() attempt to force the mode only if port_mode is configured for host mode. Fixes: d8e5f0eca1e8 ("usb: musb: Fix hardirq-safe hardirq-unsafe lock order error") Cc: Johan Hovold Reported-by: Laurent Pinchart Reported-by: Peter Ujfalusi Tested-by: Peter Ujfalusi Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index ac3a4952abb4..dbe617a735d8 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2780,10 +2780,11 @@ int musb_host_setup(struct musb *musb, int power_budget) int ret; struct usb_hcd *hcd = musb->hcd; - MUSB_HST_MODE(musb); - musb->xceiv->otg->default_a = 1; - musb->xceiv->otg->state = OTG_STATE_A_IDLE; - + if (musb->port_mode == MUSB_PORT_MODE_HOST) { + MUSB_HST_MODE(musb); + musb->xceiv->otg->default_a = 1; + musb->xceiv->otg->state = OTG_STATE_A_IDLE; + } otg_set_host(musb->xceiv->otg, &hcd->self); hcd->self.otg_port = 1; musb->xceiv->otg->host = &hcd->self; From b58d7087ce99016ed0d8739933f813e03e0f28eb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:36:02 +0200 Subject: [PATCH 243/465] USB: iowarrior: fix info ioctl on big-endian hosts commit dd5ca753fa92fb736b1395db892bd29f78e6d408 upstream. Drop erroneous le16_to_cpu when returning the USB device speed which is already in host byte order. Found using sparse: warning: cast to restricted __le16 Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 37c63cb39714..0ef29d202263 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -554,7 +554,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice); /* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */ - info.speed = le16_to_cpu(dev->udev->speed); + info.speed = dev->udev->speed; info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber; info.report_size = dev->report_size; From d986525b7dfcfc6edc57a060c382aecf34019f62 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 3 May 2017 10:28:54 +0200 Subject: [PATCH 244/465] usb: serial: option: add Telit ME910 support commit 40dd46048c155b8f0683f468c950a1c107f77a7c upstream. This patch adds support for Telit ME910 PID 0x1100. Signed-off-by: Daniele Palmas Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index af67a0de6b5d..3bf61acfc26b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -281,6 +281,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 +#define TELIT_PRODUCT_ME910 0x1100 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -640,6 +641,11 @@ static const struct option_blacklist_info simcom_sim7100e_blacklist = { .reserved = BIT(5) | BIT(6), }; +static const struct option_blacklist_info telit_me910_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(1) | BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -1235,6 +1241,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), + .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), From 07cf1f9d0150e66c2219dd96952ae6a3a8b69bfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 17 May 2017 16:30:50 +0200 Subject: [PATCH 245/465] USB: serial: qcserial: add more Lenovo EM74xx device IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8d7a10dd323993cc40bd37bce8bc570133b0c396 upstream. In their infinite wisdom, and never ending quest for end user frustration, Lenovo has decided to use new USB device IDs for the wwan modules in their 2017 laptops. The actual hardware is still the Sierra Wireless EM7455 or EM7430, depending on region. Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 38b3f0d8cd58..fd509ed6cf70 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */ {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ + {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From e4d3ae36fcb248aa1538bc654bd871f39f27b725 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:41:20 +0200 Subject: [PATCH 246/465] USB: serial: mct_u232: fix big-endian baud-rate handling commit 26cede343656c0bc2c33cdc783771282405c7fb2 upstream. Drop erroneous cpu_to_le32 when setting the baud rate, something which corrupted the divisor on big-endian hosts. Found using sparse: warning: incorrect type in argument 1 (different base types) expected unsigned int [unsigned] [usertype] val got restricted __le32 [usertype] Fixes: af2ac1a091bc ("USB: serial mct_usb232: move DMA buffers to heap") Reviewed-by: Greg Kroah-Hartman Acked-By: Pete Zaitcev Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mct_u232.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index edbc81f205c2..70f346f1aa86 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -189,7 +189,7 @@ static int mct_u232_set_baud_rate(struct tty_struct *tty, return -ENOMEM; divisor = mct_u232_calculate_baud_rate(serial, value, &speed); - put_unaligned_le32(cpu_to_le32(divisor), buf); + put_unaligned_le32(divisor, buf); rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_BAUD_RATE_REQUEST, MCT_U232_SET_REQUEST_TYPE, From 3a82455292c2b817031db57f6954f8b7e7b1dd38 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 May 2017 11:41:21 +0200 Subject: [PATCH 247/465] USB: serial: io_ti: fix div-by-zero in set_termios commit 6aeb75e6adfaed16e58780309613a578fe1ee90b upstream. Fix a division-by-zero in set_termios when debugging is enabled and a high-enough speed has been requested so that the divisor value becomes zero. Instead of just fixing the offending debug statement, cap the baud rate at the base as a zero divisor value also appears to crash the firmware. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index a76b95d32157..428ae42dd29e 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2349,8 +2349,11 @@ static void change_port_settings(struct tty_struct *tty, if (!baud) { /* pick a default, any default... */ baud = 9600; - } else + } else { + /* Avoid a zero divisor. */ + baud = min(baud, 461550); tty_encode_baud_rate(tty, baud, baud); + } edge_port->baud_rate = baud; config->wBaudRate = (__u16)((461550L + baud/2) / baud); From 345477ed11af2884b086ecefbcc4ce2716b7acab Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:27 +0200 Subject: [PATCH 248/465] USB: hub: fix SS hub-descriptor handling commit 2c25a2c818023df64463aac3288a9f969491e507 upstream. A SuperSpeed hub descriptor does not have any variable-length fields so bail out when reading a short descriptor. This avoids parsing and leaking two bytes of uninitialised slab data through sysfs removable-attributes. Fixes: dbe79bbe9dcb ("USB 3.0 Hub Changes") Cc: John Youn Acked-by: Alan Stern Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 9dca59ef18b3..3ff1e9f89f2d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -380,8 +380,12 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data) USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB, dtype << 8, 0, data, size, USB_CTRL_GET_TIMEOUT); - if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) + if (hub_is_superspeed(hdev)) { + if (ret == size) + return ret; + } else if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) { return ret; + } } return -EINVAL; } @@ -1321,7 +1325,7 @@ static int hub_configure(struct usb_hub *hub, /* Request the entire hub descriptor. * hub->descriptor can handle USB_MAXCHILDREN ports, - * but the hub can/will return fewer bytes here. + * but a (non-SS) hub can/will return fewer bytes here. */ ret = get_hub_descriptor(hdev, hub->descriptor); if (ret < 0) { From 1e2a08069dea9ac4365c32bec88b2df2f10cd3b7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:28 +0200 Subject: [PATCH 249/465] USB: hub: fix non-SS hub-descriptor handling commit bec444cd1c94c48df409a35ad4e5b143c245c3f7 upstream. Add missing sanity check on the non-SuperSpeed hub-descriptor length in order to avoid parsing and leaking two bytes of uninitialised slab data through sysfs removable-attributes (or a compound-device debug statement). Note that we only make sure that the DeviceRemovable field is always present (and specifically ignore the unused PortPwrCtrlMask field) in order to continue support any hubs with non-compliant descriptors. As a further safeguard, the descriptor buffer is also cleared. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3ff1e9f89f2d..f77a4ebde7d5 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -362,7 +362,8 @@ static void usb_set_lpm_parameters(struct usb_device *udev) } /* USB 2.0 spec Section 11.24.4.5 */ -static int get_hub_descriptor(struct usb_device *hdev, void *data) +static int get_hub_descriptor(struct usb_device *hdev, + struct usb_hub_descriptor *desc) { int i, ret, size; unsigned dtype; @@ -378,12 +379,16 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data) for (i = 0; i < 3; i++) { ret = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB, - dtype << 8, 0, data, size, + dtype << 8, 0, desc, size, USB_CTRL_GET_TIMEOUT); if (hub_is_superspeed(hdev)) { if (ret == size) return ret; - } else if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) { + } else if (ret >= USB_DT_HUB_NONVAR_SIZE + 2) { + /* Make sure we have the DeviceRemovable field. */ + size = USB_DT_HUB_NONVAR_SIZE + desc->bNbrPorts / 8 + 1; + if (ret < size) + return -EMSGSIZE; return ret; } } @@ -1317,7 +1322,7 @@ static int hub_configure(struct usb_hub *hub, } mutex_init(&hub->status_mutex); - hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL); + hub->descriptor = kzalloc(sizeof(*hub->descriptor), GFP_KERNEL); if (!hub->descriptor) { ret = -ENOMEM; goto fail; From b13b3f39851681b3e7f0f4ea2fcea4a0e47f4f0a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 May 2017 13:58:53 +0300 Subject: [PATCH 250/465] ipx: call ipxitf_put() in ioctl error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ee0d8d8482345ff97a75a7d747efc309f13b0d80 upstream. We should call ipxitf_put() if the copy_to_user() fails. Reported-by: 李强 Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipx/af_ipx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 8a9219ff2e77..fa31ef29e3fa 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1168,11 +1168,10 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg) sipx->sipx_network = ipxif->if_netnum; memcpy(sipx->sipx_node, ipxif->if_node, sizeof(sipx->sipx_node)); - rc = -EFAULT; + rc = 0; if (copy_to_user(arg, &ifr, sizeof(ifr))) - break; + rc = -EFAULT; ipxitf_put(ipxif); - rc = 0; break; } case SIOCAIPXITFCRT: From 03652e488454744b00d57fd2f257c401773bbbf9 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Thu, 13 Apr 2017 23:21:56 -0700 Subject: [PATCH 251/465] iio: proximity: as3935: fix as3935_write commit 84ca8e364acb26aba3292bc113ca8ed4335380fd upstream. AS3935_WRITE_DATA macro bit is incorrect and the actual write sequence is two leading zeros. Cc: George McCollister Signed-off-by: Matt Ranostay Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/proximity/as3935.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index 5656deb17261..020459513384 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -50,7 +50,6 @@ #define AS3935_TUNE_CAP 0x08 #define AS3935_CALIBRATE 0x3D -#define AS3935_WRITE_DATA BIT(15) #define AS3935_READ_DATA BIT(14) #define AS3935_ADDRESS(x) ((x) << 8) @@ -105,7 +104,7 @@ static int as3935_write(struct as3935_state *st, { u8 *buf = st->buf; - buf[0] = (AS3935_WRITE_DATA | AS3935_ADDRESS(reg)) >> 8; + buf[0] = AS3935_ADDRESS(reg) >> 8; buf[1] = val; return spi_write(st->spi, buf, 2); From 0d1015c0e923178c938cd578001bf583f6c9bacc Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 7 Apr 2017 17:13:17 -0700 Subject: [PATCH 252/465] iio: hid-sensor: Store restore poll and hysteresis on S3 commit 5d9854eaea776441b38a9a45b4e6879524c4f48c upstream. This change undo the change done by 'commit 3bec24747446 ("iio: hid-sensor-trigger: Change get poll value function order to avoid sensor properties losing after resume from S3")' as this breaks some USB/i2c sensor hubs. Instead of relying on HW for restoring poll and hysteresis, driver stores and restores on resume (S3). In this way user space modified settings are not lost for any kind of sensor hub behavior. In this change, whenever user space modifies sampling frequency or hysteresis driver will get the feature value from the hub and store in the per device hid_sensor_common data structure. On resume callback from S3, system will set the feature to sensor hub, if user space ever modified the feature value. Fixes: 3bec24747446 ("iio: hid-sensor-trigger: Change get poll value function order to avoid sensor properties losing after resume from S3") Reported-by: Ritesh Raj Sarraf Tested-by: Ritesh Raj Sarraf Tested-by: Song, Hongyan Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- .../hid-sensors/hid-sensor-attributes.c | 26 +++++++++++++++++-- .../common/hid-sensors/hid-sensor-trigger.c | 20 +++++++++++--- include/linux/hid-sensor-hub.h | 2 ++ 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index 01e02b9926d4..ca742ac8f128 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -221,7 +221,15 @@ int hid_sensor_write_samp_freq_value(struct hid_sensor_common *st, if (ret < 0 || value < 0) ret = -EINVAL; - return ret; + ret = sensor_hub_get_feature(st->hsdev, + st->poll.report_id, + st->poll.index, sizeof(value), &value); + if (ret < 0 || value < 0) + return -EINVAL; + + st->poll_interval = value; + + return 0; } EXPORT_SYMBOL(hid_sensor_write_samp_freq_value); @@ -266,7 +274,16 @@ int hid_sensor_write_raw_hyst_value(struct hid_sensor_common *st, if (ret < 0 || value < 0) ret = -EINVAL; - return ret; + ret = sensor_hub_get_feature(st->hsdev, + st->sensitivity.report_id, + st->sensitivity.index, sizeof(value), + &value); + if (ret < 0 || value < 0) + return -EINVAL; + + st->raw_hystersis = value; + + return 0; } EXPORT_SYMBOL(hid_sensor_write_raw_hyst_value); @@ -369,6 +386,9 @@ int hid_sensor_get_reporting_interval(struct hid_sensor_hub_device *hsdev, /* Default unit of measure is milliseconds */ if (st->poll.units == 0) st->poll.units = HID_USAGE_SENSOR_UNITS_MILLISECOND; + + st->poll_interval = -1; + return 0; } @@ -399,6 +419,8 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev, HID_USAGE_SENSOR_PROP_SENSITIVITY_ABS, &st->sensitivity); + st->raw_hystersis = -1; + sensor_hub_input_get_attribute_info(hsdev, HID_INPUT_REPORT, usage_id, HID_USAGE_SENSOR_TIME_TIMESTAMP, diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index ecf592d69043..60829340a82e 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -51,6 +51,8 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) st->report_state.report_id, st->report_state.index, HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM); + + poll_value = hid_sensor_read_poll_value(st); } else { int val; @@ -87,9 +89,7 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) sensor_hub_get_feature(st->hsdev, st->power_state.report_id, st->power_state.index, sizeof(state_val), &state_val); - if (state) - poll_value = hid_sensor_read_poll_value(st); - if (poll_value > 0) + if (state && poll_value) msleep_interruptible(poll_value * 2); return 0; @@ -127,6 +127,20 @@ static void hid_sensor_set_power_work(struct work_struct *work) struct hid_sensor_common *attrb = container_of(work, struct hid_sensor_common, work); + + if (attrb->poll_interval >= 0) + sensor_hub_set_feature(attrb->hsdev, attrb->poll.report_id, + attrb->poll.index, + sizeof(attrb->poll_interval), + &attrb->poll_interval); + + if (attrb->raw_hystersis >= 0) + sensor_hub_set_feature(attrb->hsdev, + attrb->sensitivity.report_id, + attrb->sensitivity.index, + sizeof(attrb->raw_hystersis), + &attrb->raw_hystersis); + _hid_sensor_power_state(attrb, true); } diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 7ef111d3ecc5..f32d7c392c1e 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -231,6 +231,8 @@ struct hid_sensor_common { unsigned usage_id; atomic_t data_ready; atomic_t user_requested_state; + int poll_interval; + int raw_hystersis; struct iio_trigger *trigger; int timestamp_ns_scale; struct hid_sensor_hub_attribute_info poll; From 776a591f7546796baf0629b6cd34c0abd30fc903 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 4 Apr 2017 09:32:19 -0300 Subject: [PATCH 253/465] cec: Fix runtime BUG when (CONFIG_RC_CORE && !CEC_CAP_RC) commit 43c0c03961d0b19bd225a336897606b46e0021a6 upstream. Currently when the RC Core is enabled (reachable) core code located in cec_register_adapter() attempts to populate the RC structure with a pointer to the 'parent' passed in by the caller. Unfortunately if the caller did not specify RC capability when calling cec_allocate_adapter(), then there will be no RC structure to populate. This causes a "NULL pointer dereference" error. Fixes: f51e80804f0 ("[media] cec: pass parent device in register(), not allocate()") Signed-off-by: Lee Jones Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/cec-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/cec/cec-core.c b/drivers/media/cec/cec-core.c index 37217e205040..cfe414aa0915 100644 --- a/drivers/media/cec/cec-core.c +++ b/drivers/media/cec/cec-core.c @@ -286,8 +286,8 @@ int cec_register_adapter(struct cec_adapter *adap, adap->devnode.dev.parent = parent; #if IS_REACHABLE(CONFIG_RC_CORE) - adap->rc->dev.parent = parent; if (adap->capabilities & CEC_CAP_RC) { + adap->rc->dev.parent = parent; res = rc_register_device(adap->rc); if (res) { From d926ceaa72c89eb928f02b19ac602db6884dc0f3 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 23 Feb 2017 08:43:27 -0300 Subject: [PATCH 254/465] s5p-mfc: Fix race between interrupt routine and device functions commit 0c32b8ec02832df167e16ad659cb11dc148f2ddf upstream. Interrupt routine must wake process waiting for given interrupt AFTER updating driver's internal structures and contexts. Doing it in-between is a serious bug. This patch moves all calls to the wake() function to the end of the interrupt processing block to avoid potential and real races, especially on multi-core platforms. This also fixes following issue reported from clock core (clocks were disabled in interrupt after being unprepared from the other place in the driver, the stack trace however points to the different place than s5p_mfc driver because of the race): WARNING: CPU: 1 PID: 18 at drivers/clk/clk.c:544 clk_core_unprepare+0xc8/0x108 Modules linked in: CPU: 1 PID: 18 Comm: kworker/1:0 Not tainted 4.10.0-next-20170223-00070-g04e18bc99ab9-dirty #2154 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) Workqueue: pm pm_runtime_work [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x74/0x94) [] (dump_stack) from [] (__warn+0xd4/0x100) [] (__warn) from [] (warn_slowpath_null+0x20/0x28) [] (warn_slowpath_null) from [] (clk_core_unprepare+0xc8/0x108) [] (clk_core_unprepare) from [] (clk_unprepare+0x24/0x2c) [] (clk_unprepare) from [] (exynos_sysmmu_suspend+0x48/0x60) [] (exynos_sysmmu_suspend) from [] (pm_generic_runtime_suspend+0x2c/0x38) [] (pm_generic_runtime_suspend) from [] (genpd_runtime_suspend+0x94/0x220) [] (genpd_runtime_suspend) from [] (__rpm_callback+0x134/0x208) [] (__rpm_callback) from [] (rpm_callback+0x20/0x80) [] (rpm_callback) from [] (rpm_suspend+0xdc/0x458) [] (rpm_suspend) from [] (pm_runtime_work+0x80/0x90) [] (pm_runtime_work) from [] (process_one_work+0x120/0x318) [] (process_one_work) from [] (worker_thread+0x2c/0x4ac) [] (worker_thread) from [] (kthread+0xfc/0x134) [] (kthread) from [] (ret_from_fork+0x14/0x3c) ---[ end trace 1ead49a7bb83f0d8 ]--- Fixes: af93574678108 ("[media] MFC: Add MFC 5.1 V4L2 driver") Signed-off-by: Marek Szyprowski Reviewed-by: Javier Martinez Canillas Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index bb0a5887c9a9..b7fbb02a0da8 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -666,9 +666,9 @@ static irqreturn_t s5p_mfc_irq(int irq, void *priv) break; } s5p_mfc_hw_call(dev->mfc_ops, clear_int_flags, dev); - wake_up_ctx(ctx, reason, err); WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0); s5p_mfc_clock_off(); + wake_up_ctx(ctx, reason, err); s5p_mfc_hw_call(dev->mfc_ops, try_run, dev); } else { s5p_mfc_handle_frame(ctx, reason, err); @@ -682,15 +682,11 @@ static irqreturn_t s5p_mfc_irq(int irq, void *priv) case S5P_MFC_R2H_CMD_OPEN_INSTANCE_RET: ctx->inst_no = s5p_mfc_hw_call(dev->mfc_ops, get_inst_no, dev); ctx->state = MFCINST_GOT_INST; - clear_work_bit(ctx); - wake_up(&ctx->queue); goto irq_cleanup_hw; case S5P_MFC_R2H_CMD_CLOSE_INSTANCE_RET: - clear_work_bit(ctx); ctx->inst_no = MFC_NO_INSTANCE_SET; ctx->state = MFCINST_FREE; - wake_up(&ctx->queue); goto irq_cleanup_hw; case S5P_MFC_R2H_CMD_SYS_INIT_RET: @@ -700,9 +696,9 @@ static irqreturn_t s5p_mfc_irq(int irq, void *priv) if (ctx) clear_work_bit(ctx); s5p_mfc_hw_call(dev->mfc_ops, clear_int_flags, dev); - wake_up_dev(dev, reason, err); clear_bit(0, &dev->hw_lock); clear_bit(0, &dev->enter_suspend); + wake_up_dev(dev, reason, err); break; case S5P_MFC_R2H_CMD_INIT_BUFFERS_RET: @@ -717,9 +713,7 @@ static irqreturn_t s5p_mfc_irq(int irq, void *priv) break; case S5P_MFC_R2H_CMD_DPB_FLUSH_RET: - clear_work_bit(ctx); ctx->state = MFCINST_RUNNING; - wake_up(&ctx->queue); goto irq_cleanup_hw; default: @@ -738,6 +732,8 @@ static irqreturn_t s5p_mfc_irq(int irq, void *priv) mfc_err("Failed to unlock hw\n"); s5p_mfc_clock_off(); + clear_work_bit(ctx); + wake_up(&ctx->queue); s5p_mfc_hw_call(dev->mfc_ops, try_run, dev); spin_unlock(&dev->irqlock); From 4f220307a8b232b3b1ddc1de3cc3c17a80ae3a01 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 09:53:59 -0300 Subject: [PATCH 255/465] gspca: konica: add missing endpoint sanity check commit aa58fedb8c7b6cf2f05941d238495f9e2f29655c upstream. Make sure to check the number of endpoints to avoid accessing memory beyond the endpoint array should a device lack the expected endpoints. Note that, as far as I can tell, the gspca framework has already made sure there is at least one endpoint in the current alternate setting so there should be no risk for a NULL-pointer dereference here. Fixes: b517af722860 ("V4L/DVB: gspca_konica: New gspca subdriver for konica chipset using cams") Cc: Hans de Goede Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/konica.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/gspca/konica.c b/drivers/media/usb/gspca/konica.c index 71f273377f83..31b2117e8f1d 100644 --- a/drivers/media/usb/gspca/konica.c +++ b/drivers/media/usb/gspca/konica.c @@ -184,6 +184,9 @@ static int sd_start(struct gspca_dev *gspca_dev) return -EIO; } + if (alt->desc.bNumEndpoints < 2) + return -ENODEV; + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); n = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; From 6af3917b8dddc42e2bfc84fb89d1e186281f1ffb Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 22 Mar 2017 04:53:57 -0300 Subject: [PATCH 256/465] s5p-mfc: Fix unbalanced call to clock management commit a5cb00eb4223458250b55daf03ac7ea5f424d601 upstream. Clock should be turned off after calling s5p_mfc_init_hw() from the watchdog worker, like it is already done in the s5p_mfc_open() which also calls this function. Fixes: af93574678108 ("[media] MFC: Add MFC 5.1 V4L2 driver") Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index b7fbb02a0da8..6152a0587723 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -206,6 +206,7 @@ static void s5p_mfc_watchdog_worker(struct work_struct *work) } s5p_mfc_clock_on(); ret = s5p_mfc_init_hw(dev); + s5p_mfc_clock_off(); if (ret) mfc_err("Failed to reinit FW\n"); } From 20c98053a259118ec89c91af73630e2a21583815 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 09:53:54 -0300 Subject: [PATCH 257/465] dib0700: fix NULL-deref at probe commit d5823511c0f8719a39e72ede1bce65411ac653b7 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: c4018fa2e4c0 ("[media] dib0700: fix RC support on Hauppauge Nova-TD") Cc: Mauro Carvalho Chehab Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dib0700_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index dd5edd3a17ee..08acdd32e412 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -809,6 +809,9 @@ int dib0700_rc_setup(struct dvb_usb_device *d, struct usb_interface *intf) /* Starting in firmware 1.20, the RC info is provided on a bulk pipe */ + if (intf->altsetting[0].desc.bNumEndpoints < rc_ep + 1) + return -ENODEV; + purb = usb_alloc_urb(0, GFP_KERNEL); if (purb == NULL) return -ENOMEM; From ac6dedddbadc66774a7249904c6a562f236fd8ef Mon Sep 17 00:00:00 2001 From: Alyssa Milburn Date: Sat, 1 Apr 2017 14:34:08 -0300 Subject: [PATCH 258/465] zr364xx: enforce minimum size when reading header commit ee0fe833d96793853335844b6d99fb76bd12cbeb upstream. This code copies actual_length-128 bytes from the header, which will underflow if the received buffer is too small. Signed-off-by: Alyssa Milburn Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/zr364xx/zr364xx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index f2d6fc03dda0..efdcd5bd6a4c 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -600,6 +600,14 @@ static int zr364xx_read_video_callback(struct zr364xx_camera *cam, ptr = pdest = frm->lpvbits; if (frm->ulState == ZR364XX_READ_IDLE) { + if (purb->actual_length < 128) { + /* header incomplete */ + dev_info(&cam->udev->dev, + "%s: buffer (%d bytes) too small to hold jpeg header. Discarding.\n", + __func__, purb->actual_length); + return -EINVAL; + } + frm->ulState = ZR364XX_READ_FRAME; frm->cur_size = 0; From 9e074c588db15a80c936d432015e8416a35d4956 Mon Sep 17 00:00:00 2001 From: Daniel Scheller Date: Sun, 19 Mar 2017 12:26:39 -0300 Subject: [PATCH 259/465] dvb-frontends/cxd2841er: define symbol_rate_min/max in T/C fe-ops commit 158f0328af86a99d64073851967a02694bff987d upstream. Fixes "w_scan -f c" complaining with This dvb driver is *buggy*: the symbol rate limits are undefined - please report to linuxtv.org) Signed-off-by: Daniel Scheller Acked-by: Abylay Ospan Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/cxd2841er.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index 614bfb3740f1..ce37dc2e89c7 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -3852,7 +3852,9 @@ static struct dvb_frontend_ops cxd2841er_t_c_ops = { FE_CAN_MUTE_TS | FE_CAN_2G_MODULATION, .frequency_min = 42000000, - .frequency_max = 1002000000 + .frequency_max = 1002000000, + .symbol_rate_min = 870000, + .symbol_rate_max = 11700000 }, .init = cxd2841er_init_tc, .sleep = cxd2841er_sleep_tc, From f953d6b03a2fc2837ef6c2aee66cdf55ee95e28a Mon Sep 17 00:00:00 2001 From: Alyssa Milburn Date: Sat, 1 Apr 2017 14:33:42 -0300 Subject: [PATCH 260/465] digitv: limit messages to buffer size commit 821117dc21083a99dd99174c10848d70ff43de29 upstream. Return an error rather than memcpy()ing beyond the end of the buffer. Internal callers use appropriate sizes, but digitv_i2c_xfer may not. Signed-off-by: Alyssa Milburn Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/digitv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c index 4284f6984dc1..475a3c0cdee7 100644 --- a/drivers/media/usb/dvb-usb/digitv.c +++ b/drivers/media/usb/dvb-usb/digitv.c @@ -33,6 +33,9 @@ static int digitv_ctrl_msg(struct dvb_usb_device *d, wo = (rbuf == NULL || rlen == 0); /* write-only */ + if (wlen > 4 || rlen > 4) + return -EIO; + memset(st->sndbuf, 0, 7); memset(st->rcvbuf, 0, 7); From a28e1f13a6e35a4ca078711f4383a9f3d209f15c Mon Sep 17 00:00:00 2001 From: Alyssa Milburn Date: Sat, 1 Apr 2017 14:34:49 -0300 Subject: [PATCH 261/465] dw2102: limit messages to buffer size commit 950e252cb469f323740d78e4907843acef89eedb upstream. Otherwise the i2c transfer functions can read or write beyond the end of stack or heap buffers. Signed-off-by: Alyssa Milburn Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dw2102.c | 54 ++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index 4f42d57f81d9..6e654e5026dd 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -204,6 +204,20 @@ static int dw2102_serit_i2c_transfer(struct i2c_adapter *adap, switch (num) { case 2: + if (msg[0].len != 1) { + warn("i2c rd: len=%d is not 1!\n", + msg[0].len); + num = -EOPNOTSUPP; + break; + } + + if (2 + msg[1].len > sizeof(buf6)) { + warn("i2c rd: len=%d is too big!\n", + msg[1].len); + num = -EOPNOTSUPP; + break; + } + /* read si2109 register by number */ buf6[0] = msg[0].addr << 1; buf6[1] = msg[0].len; @@ -219,6 +233,13 @@ static int dw2102_serit_i2c_transfer(struct i2c_adapter *adap, case 1: switch (msg[0].addr) { case 0x68: + if (2 + msg[0].len > sizeof(buf6)) { + warn("i2c wr: len=%d is too big!\n", + msg[0].len); + num = -EOPNOTSUPP; + break; + } + /* write to si2109 register */ buf6[0] = msg[0].addr << 1; buf6[1] = msg[0].len; @@ -262,6 +283,13 @@ static int dw2102_earda_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg ms /* first write first register number */ u8 ibuf[MAX_XFER_SIZE], obuf[3]; + if (2 + msg[0].len != sizeof(obuf)) { + warn("i2c rd: len=%d is not 1!\n", + msg[0].len); + ret = -EOPNOTSUPP; + goto unlock; + } + if (2 + msg[1].len > sizeof(ibuf)) { warn("i2c rd: len=%d is too big!\n", msg[1].len); @@ -462,6 +490,12 @@ static int dw3101_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[], /* first write first register number */ u8 ibuf[MAX_XFER_SIZE], obuf[3]; + if (2 + msg[0].len != sizeof(obuf)) { + warn("i2c rd: len=%d is not 1!\n", + msg[0].len); + ret = -EOPNOTSUPP; + goto unlock; + } if (2 + msg[1].len > sizeof(ibuf)) { warn("i2c rd: len=%d is too big!\n", msg[1].len); @@ -696,6 +730,13 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[], msg[0].buf[0] = state->data[1]; break; default: + if (3 + msg[0].len > sizeof(state->data)) { + warn("i2c wr: len=%d is too big!\n", + msg[0].len); + num = -EOPNOTSUPP; + break; + } + /* always i2c write*/ state->data[0] = 0x08; state->data[1] = msg[0].addr; @@ -711,6 +752,19 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[], break; case 2: /* always i2c read */ + if (4 + msg[0].len > sizeof(state->data)) { + warn("i2c rd: len=%d is too big!\n", + msg[0].len); + num = -EOPNOTSUPP; + break; + } + if (1 + msg[1].len > sizeof(state->data)) { + warn("i2c rd: len=%d is too big!\n", + msg[1].len); + num = -EOPNOTSUPP; + break; + } + state->data[0] = 0x09; state->data[1] = msg[0].len; state->data[2] = msg[1].len; From 8bb9fa63d0295cd03ad090c68a8a69b0eb98863b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 09:53:57 -0300 Subject: [PATCH 262/465] cx231xx-audio: fix init error path commit fff1abc4d54e469140a699612b4db8d6397bfcba upstream. Make sure to release the snd_card also on a late allocation error. Fixes: e0d3bafd0258 ("V4L/DVB (10954): Add cx231xx USB driver") Cc: Sri Deevi Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/cx231xx/cx231xx-audio.c | 25 +++++++++++++---------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/media/usb/cx231xx/cx231xx-audio.c b/drivers/media/usb/cx231xx/cx231xx-audio.c index cf80842dfa08..f3729d6eb46a 100644 --- a/drivers/media/usb/cx231xx/cx231xx-audio.c +++ b/drivers/media/usb/cx231xx/cx231xx-audio.c @@ -670,10 +670,8 @@ static int cx231xx_audio_init(struct cx231xx *dev) spin_lock_init(&adev->slock); err = snd_pcm_new(card, "Cx231xx Audio", 0, 0, 1, &pcm); - if (err < 0) { - snd_card_free(card); - return err; - } + if (err < 0) + goto err_free_card; snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_cx231xx_pcm_capture); @@ -687,10 +685,9 @@ static int cx231xx_audio_init(struct cx231xx *dev) INIT_WORK(&dev->wq_trigger, audio_trigger); err = snd_card_register(card); - if (err < 0) { - snd_card_free(card); - return err; - } + if (err < 0) + goto err_free_card; + adev->sndcard = card; adev->udev = dev->udev; @@ -709,9 +706,10 @@ static int cx231xx_audio_init(struct cx231xx *dev) "audio EndPoint Addr 0x%x, Alternate settings: %i\n", adev->end_point_addr, adev->num_alt); adev->alt_max_pkt_size = kmalloc(32 * adev->num_alt, GFP_KERNEL); - - if (adev->alt_max_pkt_size == NULL) - return -ENOMEM; + if (!adev->alt_max_pkt_size) { + err = -ENOMEM; + goto err_free_card; + } for (i = 0; i < adev->num_alt; i++) { u16 tmp = @@ -725,6 +723,11 @@ static int cx231xx_audio_init(struct cx231xx *dev) } return 0; + +err_free_card: + snd_card_free(card); + + return err; } static int cx231xx_audio_fini(struct cx231xx *dev) From 6f623a34905f5723d514c89495401a1bdda9cd5d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 09:53:58 -0300 Subject: [PATCH 263/465] cx231xx-audio: fix NULL-deref at probe commit 65f921647f4c89a2068478c89691f39b309b58f7 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. Fixes: e0d3bafd0258 ("V4L/DVB (10954): Add cx231xx USB driver") Cc: Sri Deevi Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/cx231xx/cx231xx-audio.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/cx231xx/cx231xx-audio.c b/drivers/media/usb/cx231xx/cx231xx-audio.c index f3729d6eb46a..a050d125934c 100644 --- a/drivers/media/usb/cx231xx/cx231xx-audio.c +++ b/drivers/media/usb/cx231xx/cx231xx-audio.c @@ -697,6 +697,11 @@ static int cx231xx_audio_init(struct cx231xx *dev) hs_config_info[0].interface_info. audio_index + 1]; + if (uif->altsetting[0].desc.bNumEndpoints < isoc_pipe + 1) { + err = -ENODEV; + goto err_free_card; + } + adev->end_point_addr = uif->altsetting[0].endpoint[isoc_pipe].desc. bEndpointAddress; @@ -712,8 +717,14 @@ static int cx231xx_audio_init(struct cx231xx *dev) } for (i = 0; i < adev->num_alt; i++) { - u16 tmp = - le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe].desc. + u16 tmp; + + if (uif->altsetting[i].desc.bNumEndpoints < isoc_pipe + 1) { + err = -ENODEV; + goto err_free_pkt_size; + } + + tmp = le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe].desc. wMaxPacketSize); adev->alt_max_pkt_size[i] = (tmp & 0x07ff) * (((tmp & 0x1800) >> 11) + 1); @@ -724,6 +735,8 @@ static int cx231xx_audio_init(struct cx231xx *dev) return 0; +err_free_pkt_size: + kfree(adev->alt_max_pkt_size); err_free_card: snd_card_free(card); From 5617775ed133092e521a617f45c9b6beb7e77e74 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 09:53:56 -0300 Subject: [PATCH 264/465] cx231xx-cards: fix NULL-deref at probe commit 0cd273bb5e4d1828efaaa8dfd11b7928131ed149 upstream. Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. Fixes: e0d3bafd0258 ("V4L/DVB (10954): Add cx231xx USB driver") Cc: Sri Deevi Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/cx231xx/cx231xx-cards.c | 45 ++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/cx231xx/cx231xx-cards.c b/drivers/media/usb/cx231xx/cx231xx-cards.c index f730fdbc9156..f850267a0095 100644 --- a/drivers/media/usb/cx231xx/cx231xx-cards.c +++ b/drivers/media/usb/cx231xx/cx231xx-cards.c @@ -1426,6 +1426,9 @@ static int cx231xx_init_v4l2(struct cx231xx *dev, uif = udev->actconfig->interface[idx]; + if (uif->altsetting[0].desc.bNumEndpoints < isoc_pipe + 1) + return -ENODEV; + dev->video_mode.end_point_addr = uif->altsetting[0].endpoint[isoc_pipe].desc.bEndpointAddress; dev->video_mode.num_alt = uif->num_altsetting; @@ -1439,7 +1442,12 @@ static int cx231xx_init_v4l2(struct cx231xx *dev, return -ENOMEM; for (i = 0; i < dev->video_mode.num_alt; i++) { - u16 tmp = le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe].desc.wMaxPacketSize); + u16 tmp; + + if (uif->altsetting[i].desc.bNumEndpoints < isoc_pipe + 1) + return -ENODEV; + + tmp = le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe].desc.wMaxPacketSize); dev->video_mode.alt_max_pkt_size[i] = (tmp & 0x07ff) * (((tmp & 0x1800) >> 11) + 1); dev_dbg(dev->dev, "Alternate setting %i, max size= %i\n", i, @@ -1456,6 +1464,9 @@ static int cx231xx_init_v4l2(struct cx231xx *dev, } uif = udev->actconfig->interface[idx]; + if (uif->altsetting[0].desc.bNumEndpoints < isoc_pipe + 1) + return -ENODEV; + dev->vbi_mode.end_point_addr = uif->altsetting[0].endpoint[isoc_pipe].desc. bEndpointAddress; @@ -1472,8 +1483,12 @@ static int cx231xx_init_v4l2(struct cx231xx *dev, return -ENOMEM; for (i = 0; i < dev->vbi_mode.num_alt; i++) { - u16 tmp = - le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe]. + u16 tmp; + + if (uif->altsetting[i].desc.bNumEndpoints < isoc_pipe + 1) + return -ENODEV; + + tmp = le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe]. desc.wMaxPacketSize); dev->vbi_mode.alt_max_pkt_size[i] = (tmp & 0x07ff) * (((tmp & 0x1800) >> 11) + 1); @@ -1493,6 +1508,9 @@ static int cx231xx_init_v4l2(struct cx231xx *dev, } uif = udev->actconfig->interface[idx]; + if (uif->altsetting[0].desc.bNumEndpoints < isoc_pipe + 1) + return -ENODEV; + dev->sliced_cc_mode.end_point_addr = uif->altsetting[0].endpoint[isoc_pipe].desc. bEndpointAddress; @@ -1507,7 +1525,12 @@ static int cx231xx_init_v4l2(struct cx231xx *dev, return -ENOMEM; for (i = 0; i < dev->sliced_cc_mode.num_alt; i++) { - u16 tmp = le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe]. + u16 tmp; + + if (uif->altsetting[i].desc.bNumEndpoints < isoc_pipe + 1) + return -ENODEV; + + tmp = le16_to_cpu(uif->altsetting[i].endpoint[isoc_pipe]. desc.wMaxPacketSize); dev->sliced_cc_mode.alt_max_pkt_size[i] = (tmp & 0x07ff) * (((tmp & 0x1800) >> 11) + 1); @@ -1676,6 +1699,11 @@ static int cx231xx_usb_probe(struct usb_interface *interface, } uif = udev->actconfig->interface[idx]; + if (uif->altsetting[0].desc.bNumEndpoints < isoc_pipe + 1) { + retval = -ENODEV; + goto err_video_alt; + } + dev->ts1_mode.end_point_addr = uif->altsetting[0].endpoint[isoc_pipe]. desc.bEndpointAddress; @@ -1693,7 +1721,14 @@ static int cx231xx_usb_probe(struct usb_interface *interface, } for (i = 0; i < dev->ts1_mode.num_alt; i++) { - u16 tmp = le16_to_cpu(uif->altsetting[i]. + u16 tmp; + + if (uif->altsetting[i].desc.bNumEndpoints < isoc_pipe + 1) { + retval = -ENODEV; + goto err_video_alt; + } + + tmp = le16_to_cpu(uif->altsetting[i]. endpoint[isoc_pipe].desc. wMaxPacketSize); dev->ts1_mode.alt_max_pkt_size[i] = From 63f44c113b22452921d41b5314f9781c5bf709b3 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 19 Apr 2017 16:38:26 +1000 Subject: [PATCH 265/465] powerpc/mm: Ensure IRQs are off in switch_mm() commit 9765ad134a00a01cbcc69c78ff6defbfad209bc5 upstream. powerpc expects IRQs to already be (soft) disabled when switch_mm() is called, as made clear in the commit message of 9c1e105238c4 ("powerpc: Allow perf_counters to access user memory at interrupt time"). Aside from any race conditions that might exist between switch_mm() and an IRQ, there is also an unconditional hard_irq_disable() in switch_slb(). If that isn't followed at some point by an IRQ enable then interrupts will remain disabled until we return to userspace. It is true that when switch_mm() is called from the scheduler IRQs are off, but not when it's called by use_mm(). Looking closer we see that last year in commit f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler") this was made more explicit by the addition of switch_mm_irqs_off() which is now called by the scheduler, vs switch_mm() which is used by use_mm(). Arguably it is a bug in use_mm() to call switch_mm() in a different context than it expects, but fixing that will take time. This was discovered recently when vhost started throwing warnings such as: BUG: sleeping function called from invalid context at kernel/mutex.c:578 in_atomic(): 0, irqs_disabled(): 1, pid: 10768, name: vhost-10760 no locks held by vhost-10760/10768. irq event stamp: 10 hardirqs last enabled at (9): _raw_spin_unlock_irq+0x40/0x80 hardirqs last disabled at (10): switch_slb+0x2e4/0x490 softirqs last enabled at (0): copy_process+0x5e8/0x1260 softirqs last disabled at (0): (null) Call Trace: show_stack+0x88/0x390 (unreliable) dump_stack+0x30/0x44 __might_sleep+0x1c4/0x2d0 mutex_lock_nested+0x74/0x5c0 cgroup_attach_task_all+0x5c/0x180 vhost_attach_cgroups_work+0x58/0x80 [vhost] vhost_worker+0x24c/0x3d0 [vhost] kthread+0xec/0x100 ret_from_kernel_thread+0x5c/0xd4 Prior to commit 04b96e5528ca ("vhost: lockless enqueuing") (Aug 2016) the vhost_worker() would do a spin_unlock_irq() not long after calling use_mm(), which had the effect of reenabling IRQs. Since that commit removed the locking in vhost_worker() the body of the vhost_worker() loop now runs with interrupts off causing the warnings. This patch addresses the problem by making the powerpc code mirror the x86 code, ie. we disable interrupts in switch_mm(), and optimise the scheduler case by defining switch_mm_irqs_off(). Signed-off-by: David Gibson [mpe: Flesh out/rewrite change log, add stable] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu_context.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b9e3f0aca261..0012f0353fd6 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -70,8 +70,9 @@ extern void drop_cop(unsigned long acop, struct mm_struct *mm); * switch_mm is the entry point called from the architecture independent * code in kernel/sched/core.c */ -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +static inline void switch_mm_irqs_off(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) { /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) @@ -110,6 +111,18 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, switch_mmu_context(prev, next, tsk); } +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} +#define switch_mm_irqs_off switch_mm_irqs_off + + #define deactivate_mm(tsk,mm) do { } while (0) /* From e548d552c01e9666b7316054964b4696e4a71072 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 19 Apr 2017 17:39:26 +1000 Subject: [PATCH 266/465] powerpc/eeh: Avoid use after free in eeh_handle_special_event() commit daeba2956f32f91f3493788ff6ee02fb1b2f02fa upstream. eeh_handle_special_event() is called when an EEH event is detected but can't be narrowed down to a specific PE. This function looks through every PE to find one in an erroneous state, then calls the regular event handler eeh_handle_normal_event() once it knows which PE has an error. However, if eeh_handle_normal_event() found that the PE cannot possibly be recovered, it will free it, rendering the passed PE stale. This leads to a use after free in eeh_handle_special_event() as it attempts to clear the "recovering" state on the PE after eeh_handle_normal_event() returns. Thus, make sure the PE is valid when attempting to clear state in eeh_handle_special_event(). Fixes: 8a6b1bc70dbb ("powerpc/eeh: EEH core to handle special event") Reported-by: Alexey Kardashevskiy Signed-off-by: Russell Currey Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh_driver.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b94887165a10..e50d1470714f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -724,7 +724,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ #define MAX_WAIT_FOR_RECOVERY 300 -static void eeh_handle_normal_event(struct eeh_pe *pe) +static bool eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; struct eeh_dev *edev, *tmp; @@ -736,7 +736,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (!frozen_bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return; + return false; } eeh_pe_update_time_stamp(pe); @@ -870,7 +870,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return; + return false; excess_failures: /* @@ -915,8 +915,12 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pci_lock_rescan_remove(); pci_hp_remove_devices(frozen_bus); pci_unlock_rescan_remove(); + + /* The passed PE should no longer be used */ + return true; } } + return false; } static void eeh_handle_special_event(void) @@ -982,7 +986,14 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - eeh_handle_normal_event(pe); + /* + * eeh_handle_normal_event() can make the PE stale if it + * determines that the PE cannot possibly be recovered. + * Don't modify the PE state if that's the case. + */ + if (eeh_handle_normal_event(pe)) + continue; + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); From 11e382b3b8520bb9a4577e3a4995f09c0e5dbb5c Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 18 Apr 2017 22:08:17 +0530 Subject: [PATCH 267/465] powerpc/book3s/mce: Move add_taint() later in virtual mode commit d93b0ac01a9ce276ec39644be47001873d3d183c upstream. machine_check_early() gets called in real mode. The very first time when add_taint() is called, it prints a warning which ends up calling opal call (that uses OPAL_CALL wrapper) for writing it to console. If we get a very first machine check while we are in opal we are doomed. OPAL_CALL overwrites the PACASAVEDMSR in r13 and in this case when we are done with MCE handling the original opal call will use this new MSR on it's way back to opal_return. This usually leads to unexpected behaviour or the kernel to panic. Instead move the add_taint() call later in the virtual mode where it is safe to call. This is broken with current FW level. We got lucky so far for not getting very first MCE hit while in OPAL. But easily reproducible on Mambo. Fixes: 27ea2c420cad ("powerpc: Set the correct kernel taint on machine check errors.") Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/mce.c | 2 ++ arch/powerpc/kernel/traps.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a1475e6aef3a..b23b32385583 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -221,6 +221,8 @@ static void machine_check_process_queued_event(struct irq_work *work) { int index; + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* * For now just print it to console. * TODO: log this error event to FSP or nvram. diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ff365f9de27a..af97e8135939 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -306,8 +306,6 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); - add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; @@ -741,6 +739,8 @@ void machine_check_exception(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code From d731227327be584369ec370f5527a8d0fea7a792 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 17 Apr 2017 20:21:40 -0400 Subject: [PATCH 268/465] powerpc/pseries: Fix of_node_put() underflow during DLPAR remove commit 68baf692c435339e6295cb470ea5545cbc28160e upstream. Historically struct device_node references were tracked using a kref embedded as a struct field. Commit 75b57ecf9d1d ("of: Make device nodes kobjects so they show up in sysfs") (Mar 2014) refactored device_nodes to be kobjects such that the device tree could by more simply exposed to userspace using sysfs. Commit 0829f6d1f69e ("of: device_node kobject lifecycle fixes") (Mar 2014) followed up these changes to better control the kobject lifecycle and in particular the referecne counting via of_node_get(), of_node_put(), and of_node_init(). A result of this second commit was that it introduced an of_node_put() call when a dynamic node is detached, in of_node_remove(), that removes the initial kobj reference created by of_node_init(). Traditionally as the original dynamic device node user the pseries code had assumed responsibilty for releasing this final reference in its platform specific DLPAR detach code. This patch fixes a refcount underflow introduced by commit 0829f6d1f6, and recently exposed by the upstreaming of the recount API. Messages like the following are no longer seen in the kernel log with this patch following DLPAR remove operations of cpus and pci devices. rpadlpar_io: slot PHB 72 removed refcount_t: underflow; use-after-free. ------------[ cut here ]------------ WARNING: CPU: 5 PID: 3335 at lib/refcount.c:128 refcount_sub_and_test+0xf4/0x110 Fixes: 0829f6d1f69e ("of: device_node kobject lifecycle fixes") Signed-off-by: Tyrel Datwyler [mpe: Make change log commit references more verbose] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/dlpar.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 193e052fa0dd..bda18d8e1674 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -288,7 +288,6 @@ int dlpar_detach_node(struct device_node *dn) if (rc) return rc; - of_node_put(dn); /* Must decrement the refcount */ return 0; } From eb0807b288ac5b75d80553148616160ebb4cb51f Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 17 Apr 2017 20:24:39 -0400 Subject: [PATCH 269/465] powerpc/sysfs: Fix reference leak of cpu device_nodes present at boot commit e76ca27790a514590af782f83f6eae49e0ccf8c9 upstream. For CPUs present at boot each logical CPU acquires a reference to the associated device node of the core. This happens in register_cpu() which is called by topology_init(). The result of this is that we end up with a reference held by each thread of the core. However, these references are never freed if the CPU core is DLPAR removed. This patch fixes the reference leaks by acquiring and releasing the references in the CPU hotplug callbacks un/register_cpu_online(). With this patch symmetric reference counting is observed with both CPUs present at boot, and those DLPAR added after boot. Fixes: f86e4718f24b ("driver/core: cpu: initialize of_node in cpu's device struture") Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c1fb255a60d6..949957b97ead 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -710,6 +710,10 @@ static int register_cpu_online(unsigned int cpu) struct device_attribute *attrs, *pmc_attrs; int i, nattrs; + /* For cpus present at boot a reference was already grabbed in register_cpu() */ + if (!s->of_node) + s->of_node = of_get_cpu_node(cpu, NULL); + #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SMT)) device_create_file(s, &dev_attr_smt_snooze_delay); @@ -864,6 +868,8 @@ static int unregister_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_offline(cpu); + of_node_put(s->of_node); + s->of_node = NULL; #endif /* CONFIG_HOTPLUG_CPU */ return 0; } From 8781143e13ea386c9d199117f7da09477b6f22b4 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 11 Apr 2017 17:54:57 +1000 Subject: [PATCH 270/465] powerpc/iommu: Do not call PageTransHuge() on tail pages commit e889e96e98e8da97bd39e46b7253615eabe14397 upstream. The CMA pages migration code does not support compound pages at the moment so it performs few tests before proceeding to actual page migration. One of the tests - PageTransHuge() - has VM_BUG_ON_PAGE(PageTail()) as it is designed to be called on head pages only. Since we also test for PageCompound(), and it contains PageTail() and PageHead(), we can simplify the check by leaving just PageCompound() and therefore avoid possible VM_BUG_ON_PAGE. Fixes: 2e5bbb5461f1 ("KVM: PPC: Book3S HV: Migrate pinned pages out of CMA") Signed-off-by: Alexey Kardashevskiy Acked-by: Balbir Singh Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/mmu_context_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 497130c5c742..96f835cbf212 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -81,7 +81,7 @@ struct page *new_iommu_non_cma_page(struct page *page, unsigned long private, gfp_t gfp_mask = GFP_USER; struct page *new_page; - if (PageHuge(page) || PageTransHuge(page) || PageCompound(page)) + if (PageCompound(page)) return NULL; if (PageHighMem(page)) @@ -100,7 +100,7 @@ static int mm_iommu_move_page_from_cma(struct page *page) LIST_HEAD(cma_migrate_pages); /* Ignore huge pages for now */ - if (PageHuge(page) || PageTransHuge(page) || PageCompound(page)) + if (PageCompound(page)) return -EBUSY; lru_add_drain(); From 31f96d860ef7115d4a24d0abf9e8f7afe62b4f71 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 3 May 2017 13:24:08 +1000 Subject: [PATCH 271/465] powerpc/powernv: Fix TCE kill on NVLink2 commit 6b3d12a948d27977816a15eb48409a298902a548 upstream. Commit 616badd2fb49 ("powerpc/powernv: Use OPAL call for TCE kill on NVLink2") forced all TCE kills to go via the OPAL call for NVLink2. However the PHB3 implementation of TCE kill was still being called directly from some functions which in some circumstances caused a machine check. This patch adds an equivalent IODA2 version of the function which uses the correct invalidation method depending on PHB model and changes all external callers to use it instead. Fixes: 616badd2fb49 ("powerpc/powernv: Use OPAL call for TCE kill on NVLink2") Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/npu-dma.c | 8 ++++---- arch/powerpc/platforms/powernv/pci-ioda.c | 10 +++++++++- arch/powerpc/platforms/powernv/pci.h | 2 +- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 1c383f38031d..d5b73eb1fae5 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -180,7 +180,7 @@ long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); return rc; } - pnv_pci_phb3_tce_invalidate_entire(phb, false); + pnv_pci_ioda2_tce_invalidate_entire(phb, false); /* Add the table to the list so its TCE cache will get invalidated */ pnv_pci_link_table_and_group(phb->hose->node, num, @@ -204,7 +204,7 @@ long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) pe_err(npe, "Unmapping failed, ret = %lld\n", rc); return rc; } - pnv_pci_phb3_tce_invalidate_entire(phb, false); + pnv_pci_ioda2_tce_invalidate_entire(phb, false); pnv_pci_unlink_table_and_group(npe->table_group.tables[num], &npe->table_group); @@ -270,7 +270,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) 0 /* bypass base */, top); if (rc == OPAL_SUCCESS) - pnv_pci_phb3_tce_invalidate_entire(phb, false); + pnv_pci_ioda2_tce_invalidate_entire(phb, false); return rc; } @@ -334,7 +334,7 @@ void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) pe_err(npe, "Failed to disable bypass, err %lld\n", rc); return; } - pnv_pci_phb3_tce_invalidate_entire(npe->phb, false); + pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); } struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e36738291c32..ecfbf66ca040 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1883,7 +1883,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { #define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1) #define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2) -void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) +static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) { __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm); const unsigned long val = PHB3_TCE_KILL_INVAL_ALL; @@ -1979,6 +1979,14 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } +void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) +{ + if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) + pnv_pci_phb3_tce_invalidate_entire(phb, rm); + else + opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); +} + static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index e1d3e5526b54..5d6599373f32 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -229,7 +229,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, /* Nvlink functions */ extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); -extern void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm); +extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, struct iommu_table *tbl); From 1d19e3f139f5716ef85a580683f70b1572b8c7f3 Mon Sep 17 00:00:00 2001 From: LiuHailong Date: Tue, 7 Feb 2017 10:35:52 +0800 Subject: [PATCH 272/465] powerpc/64e: Fix hang when debugging programs with relocated kernel commit fd615f69a18a9d4aa5ef02a1dc83f319f75da8e7 upstream. Debug interrupts can be taken during interrupt entry, since interrupt entry does not automatically turn them off. The kernel will check whether the faulting instruction is between [interrupt_base_book3e, __end_interrupts], and if so clear MSR[DE] and return. However, when the kernel is built with CONFIG_RELOCATABLE, it can't use LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) and LOAD_REG_IMMEDIATE(r15,__end_interrupts), as they ignore relocation. Thus, if the kernel is actually running at a different address than it was built at, the address comparison will fail, and the exception entry code will hang at kernel_dbg_exc. r2(toc) is also not usable here, as r2 still holds data from the interrupted context, so LOAD_REG_ADDR() doesn't work either. So we use the *name@got* to get the EV of two labels directly. Test programs test.c shows as follows: int main(int argc, char *argv[]) { if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1) printf("Kernel doesn't have perf_event support\n"); } Steps to reproduce the bug, for example: 1) ./gdb ./test 2) (gdb) b access 3) (gdb) r 4) (gdb) s Signed-off-by: Liu Hailong Signed-off-by: Jiang Xuexin Reviewed-by: Jiang Biao Reviewed-by: Liu Song Reviewed-by: Huang Jian [scottwood: cleaned up commit message, and specified bad behavior as a hang rather than an oops to correspond to mainline kernel behavior] Fixes: 1cb6e0649248 ("powerpc/book3e: support CONFIG_RELOCATABLE") Signed-off-by: Scott Wood Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64e.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 45b453e4d0c8..acd8ca76233e 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -735,8 +735,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f +#ifdef CONFIG_RELOCATABLE + ld r15,PACATOC(r13) + ld r14,interrupt_base_book3e@got(r15) + ld r15,__end_interrupts@got(r15) +#else LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) LOAD_REG_IMMEDIATE(r15,__end_interrupts) +#endif cmpld cr0,r10,r14 cmpld cr1,r10,r15 blt+ cr0,1f @@ -799,8 +805,14 @@ kernel_dbg_exc: andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f +#ifdef CONFIG_RELOCATABLE + ld r15,PACATOC(r13) + ld r14,interrupt_base_book3e@got(r15) + ld r15,__end_interrupts@got(r15) +#else LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) LOAD_REG_IMMEDIATE(r15,__end_interrupts) +#endif cmpld cr0,r10,r14 cmpld cr1,r10,r15 blt+ cr0,1f From f9fde78726f859f1d314407ab39104de5d3048f2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 May 2017 20:42:53 +1000 Subject: [PATCH 273/465] powerpc/mm: Fix crash in page table dump with huge pages commit bfb9956ab4d8242f4594b5f4bee534b935384fd9 upstream. The page table dump code doesn't know about huge pages, so currently it crashes (or walks random memory, usually leading to a crash), if it finds a huge page. On Book3S we only see huge pages in the Linux page tables when we're using the P9 Radix MMU. Teaching the code to properly handle huge pages is a bit more involved, so for now just prevent the crash. Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables") Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/dump_linuxpagetables.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 49abaf4dc8e3..292214afe0f1 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -16,6 +16,7 @@ */ #include #include +#include #include #include #include @@ -331,7 +332,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { addr = start + i * PMD_SIZE; - if (!pmd_none(*pmd)) + if (!pmd_none(*pmd) && !pmd_huge(*pmd)) /* pmd exists */ walk_pte(st, pmd, addr); else @@ -347,7 +348,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) for (i = 0; i < PTRS_PER_PUD; i++, pud++) { addr = start + i * PUD_SIZE; - if (!pud_none(*pud)) + if (!pud_none(*pud) && !pud_huge(*pud)) /* pud exists */ walk_pmd(st, pud, addr); else @@ -367,7 +368,7 @@ static void walk_pagetables(struct pg_state *st) */ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = KERN_VIRT_START + i * PGDIR_SIZE; - if (!pgd_none(*pgd)) + if (!pgd_none(*pgd) && !pgd_huge(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); else From 75295195f763672a48d8097a8f45c295e6fdd3bc Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 8 May 2017 17:16:26 +1000 Subject: [PATCH 274/465] powerpc/tm: Fix FP and VMX register corruption commit f48e91e87e67b56bef63393d1a02c6e22c1d7078 upstream. In commit dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state to store live registers"), a section of code was removed that copied the current state to checkpointed state. That code should not have been removed. When an FP (Floating Point) unavailable is taken inside a transaction, we need to abort the transaction. This is because at the time of the tbegin, the FP state is bogus so the state stored in the checkpointed registers is incorrect. To fix this, we treclaim (to get the checkpointed GPRs) and then copy the thread_struct FP live state into the checkpointed state. We then trecheckpoint so that the FP state is correctly restored into the CPU. The copying of the FP registers from live to checkpointed is what was missing. This simplifies the logic slightly from the original patch. tm_reclaim_thread() will now always write the checkpointed FP state. Either the checkpointed FP state will be written as part of the actual treclaim (in tm.S), or it'll be a copy of the live state. Which one we use is based on MSR[FP] from userspace. Similarly for VMX. Fixes: dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state to store live registers") Signed-off-by: Michael Neuling Reviewed-by: cyrilbur@gmail.com Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/process.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d645da302bf2..baae104b16c7 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -864,6 +864,25 @@ static void tm_reclaim_thread(struct thread_struct *thr, if (!MSR_TM_SUSPENDED(mfmsr())) return; + /* + * If we are in a transaction and FP is off then we can't have + * used FP inside that transaction. Hence the checkpointed + * state is the same as the live state. We need to copy the + * live state to the checkpointed state so that when the + * transaction is restored, the checkpointed state is correct + * and the aborted transaction sees the correct state. We use + * ckpt_regs.msr here as that's what tm_reclaim will use to + * determine if it's going to write the checkpointed state or + * not. So either this will write the checkpointed registers, + * or reclaim will. Similarly for VMX. + */ + if ((thr->ckpt_regs.msr & MSR_FP) == 0) + memcpy(&thr->ckfp_state, &thr->fp_state, + sizeof(struct thread_fp_state)); + if ((thr->ckpt_regs.msr & MSR_VEC) == 0) + memcpy(&thr->ckvr_state, &thr->vr_state, + sizeof(struct thread_vr_state)); + giveup_all(container_of(thr, struct task_struct, thread)); tm_reclaim(thr, thr->ckpt_regs.msr, cause); From c735d4e3c2d378574f335f3b2c3f8eb80b2ee0a8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:37 +0100 Subject: [PATCH 275/465] arm64: KVM: Do not use stack-protector to compile EL2 code commit cde13b5dad60471886a3bccb4f4134c647c4a9dc upstream. We like living dangerously. Nothing explicitely forbids stack-protector to be used in the EL2 code, while distributions routinely compile their kernel with it. We're just lucky that no code actually triggers the instrumentation. Let's not try our luck for much longer, and disable stack-protector for code living at EL2. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index aaf42ae8d8c3..14c4e3b14bcb 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -2,6 +2,8 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # +ccflags-y += -fno-stack-protector + KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o From 50f665c39d6cfceee2c87ec7f5320ab7f3ca45dd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:38 +0100 Subject: [PATCH 276/465] arm: KVM: Do not use stack-protector to compile HYP code commit 501ad27c67ed0b90df465f23d33e9aed64058a47 upstream. We like living dangerously. Nothing explicitely forbids stack-protector to be used in the HYP code, while distributions routinely compile their kernel with it. We're just lucky that no code actually triggers the instrumentation. Let's not try our luck for much longer, and disable stack-protector for code living at HYP. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/hyp/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 3023bb530edf..8679405b0b2b 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -2,6 +2,8 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # +ccflags-y += -fno-stack-protector + KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o From 181339b5409a45e01b8f95043e433cff853981d0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:39 +0100 Subject: [PATCH 277/465] KVM: arm/arm64: vgic-v2: Do not use Active+Pending state for a HW interrupt commit ddf42d068f8802de122bb7efdfcb3179336053f1 upstream. When an interrupt is injected with the HW bit set (indicating that deactivation should be propagated to the physical distributor), special care must be taken so that we never mark the corresponding LR with the Active+Pending state (as the pending state is kept in the physycal distributor). Fixes: 140b086dd197 ("KVM: arm/arm64: vgic-new: Add GICv2 world switch backend") Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-v2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index b637d9c7afe3..2f54ce6d3a51 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -181,6 +181,13 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->hw) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active && irq_is_pending(irq)) + val &= ~GICH_LR_PENDING_BIT; } else { if (irq->config == VGIC_CONFIG_LEVEL) val |= GICH_LR_EOI; From c870815609d305603eb9e6bf1b75c6073020c12e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 May 2017 14:30:40 +0100 Subject: [PATCH 278/465] KVM: arm/arm64: vgic-v3: Do not use Active+Pending state for a HW interrupt commit 3d6e77ad1489650afa20da92bb589c8778baa8da upstream. When an interrupt is injected with the HW bit set (indicating that deactivation should be propagated to the physical distributor), special care must be taken so that we never mark the corresponding LR with the Active+Pending state (as the pending state is kept in the physycal distributor). Fixes: 59529f69f504 ("KVM: arm/arm64: vgic-new: Add GICv3 world switch backend") Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-v3.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index be0f4c3e0142..abd3d4a1cb49 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -149,6 +149,13 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->hw) { val |= ICH_LR_HW; val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active && irq_is_pending(irq)) + val &= ~ICH_LR_PENDING_BIT; } else { if (irq->config == VGIC_CONFIG_LEVEL) val |= ICH_LR_EOI; From ae9bd04a13e3548e5f0541f51e0469932f08aad9 Mon Sep 17 00:00:00 2001 From: Zhichao Huang Date: Thu, 11 May 2017 13:46:11 +0100 Subject: [PATCH 279/465] KVM: arm: plug potential guest hardware debug leakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 661e6b02b5aa82db31897f36e96324b77450fd7a upstream. Hardware debugging in guests is not intercepted currently, it means that a malicious guest can bring down the entire machine by writing to the debug registers. This patch enable trapping of all debug registers, preventing the guests to access the debug registers. This includes access to the debug mode(DBGDSCR) in the guest world all the time which could otherwise mess with the host state. Reads return 0 and writes are ignored (RAZ_WI). The result is the guest cannot detect any working hardware based debug support. As debug exceptions are still routed to the guest normal debug using software based breakpoints still works. To support debugging using hardware registers we need to implement a debug register aware world switch as well as special trapping for registers that may affect the host state. Signed-off-by: Zhichao Huang Signed-off-by: Alex Bennée Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_coproc.h | 3 +- arch/arm/kvm/coproc.c | 77 +++++++++++++++++++++++-------- arch/arm/kvm/handle_exit.c | 4 +- arch/arm/kvm/hyp/switch.c | 4 +- 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h index 4917c2f7e459..e74ab0fbab79 100644 --- a/arch/arm/include/asm/kvm_coproc.h +++ b/arch/arm/include/asm/kvm_coproc.h @@ -31,7 +31,8 @@ void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table); int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 3e5e4194ef86..c3ed6bd5ddf3 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -93,12 +93,6 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - kvm_inject_undefined(vcpu); - return 1; -} - static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { /* @@ -514,12 +508,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu, return 1; } -/** - * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access - * @vcpu: The VCPU pointer - * @run: The kvm_run struct - */ -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +static struct coproc_params decode_64bit_hsr(struct kvm_vcpu *vcpu) { struct coproc_params params; @@ -533,9 +522,38 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; params.CRm = 0; + return params; +} + +/** + * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_64bit_hsr(vcpu); + return emulate_cp15(vcpu, ¶ms); } +/** + * kvm_handle_cp14_64 -- handles a mrrc/mcrr trap on a guest CP14 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_64bit_hsr(vcpu); + + /* raz_wi cp14 */ + pm_fake(vcpu, ¶ms, NULL); + + /* handled */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; +} + static void reset_coproc_regs(struct kvm_vcpu *vcpu, const struct coproc_reg *table, size_t num) { @@ -546,12 +564,7 @@ static void reset_coproc_regs(struct kvm_vcpu *vcpu, table[i].reset(vcpu, &table[i]); } -/** - * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access - * @vcpu: The VCPU pointer - * @run: The kvm_run struct - */ -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu) { struct coproc_params params; @@ -565,9 +578,37 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7; params.Rt2 = 0; + return params; +} + +/** + * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_32bit_hsr(vcpu); return emulate_cp15(vcpu, ¶ms); } +/** + * kvm_handle_cp14_32 -- handles a mrc/mcr trap on a guest CP14 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params = decode_32bit_hsr(vcpu); + + /* raz_wi cp14 */ + pm_fake(vcpu, ¶ms, NULL); + + /* handled */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; +} + /****************************************************************************** * Userspace API *****************************************************************************/ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 96af65a30d78..42f5daf715d0 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -95,9 +95,9 @@ static exit_handle_fn arm_exit_handlers[] = { [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, - [HSR_EC_CP14_MR] = kvm_handle_cp14_access, + [HSR_EC_CP14_MR] = kvm_handle_cp14_32, [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store, - [HSR_EC_CP14_64] = kvm_handle_cp14_access, + [HSR_EC_CP14_64] = kvm_handle_cp14_64, [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access, [HSR_EC_CP10_ID] = kvm_handle_cp10_id, [HSR_EC_HVC] = handle_hvc, diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index 92678b7bd046..624a510d31df 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -48,7 +48,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host) write_sysreg(HSTR_T(15), HSTR); write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR); val = read_sysreg(HDCR); - write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR); + val |= HDCR_TPM | HDCR_TPMCR; /* trap performance monitors */ + val |= HDCR_TDRA | HDCR_TDOSA | HDCR_TDA; /* trap debug regs */ + write_sysreg(val, HDCR); } static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) From 7e2aa72f117e10fd37716ef951f56b28f6d4d32d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Feb 2017 19:40:12 +0100 Subject: [PATCH 280/465] ARM: 8662/1: module: split core and init PLT sections commit b7ede5a1f5905ac394cc8e61712a13e3c5cb7b8f upstream. Since commit 35fa91eed817 ("ARM: kernel: merge core and init PLTs"), the ARM module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This puts the PLT entries out of reach of the relocated branch instructions, defeating the whole purpose of PLTs. So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section. Fixes: 35fa91eed817 ("ARM: kernel: merge core and init PLTs") Reported-by: Angus Clark Tested-by: Angus Clark Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/module.h | 9 +++- arch/arm/kernel/module-plts.c | 87 ++++++++++++++++++++++++----------- arch/arm/kernel/module.lds | 1 + 3 files changed, 68 insertions(+), 29 deletions(-) diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 464748b9fd7d..ed2319663a1e 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -18,13 +18,18 @@ enum { }; #endif +struct mod_plt_sec { + struct elf32_shdr *plt; + int plt_count; +}; + struct mod_arch_specific { #ifdef CONFIG_ARM_UNWIND struct unwind_table *unwind[ARM_SEC_MAX]; #endif #ifdef CONFIG_ARM_MODULE_PLTS - struct elf32_shdr *plt; - int plt_count; + struct mod_plt_sec core; + struct mod_plt_sec init; #endif }; diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 3a5cba90c971..3d0c2e4dda1d 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 Linaro Ltd. + * Copyright (C) 2014-2017 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -31,9 +31,17 @@ struct plt_entries { u32 lit[PLT_ENT_COUNT]; }; +static bool in_init(const struct module *mod, unsigned long loc) +{ + return loc - (u32)mod->init_layout.base < mod->init_layout.size; +} + u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) { - struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr; + struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : + &mod->arch.init; + + struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr; int idx = 0; /* @@ -41,9 +49,9 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) * relocations are sorted, this will be the last entry we allocated. * (if one exists). */ - if (mod->arch.plt_count > 0) { - plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT; - idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT; + if (pltsec->plt_count > 0) { + plt += (pltsec->plt_count - 1) / PLT_ENT_COUNT; + idx = (pltsec->plt_count - 1) % PLT_ENT_COUNT; if (plt->lit[idx] == val) return (u32)&plt->ldr[idx]; @@ -53,8 +61,8 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) plt++; } - mod->arch.plt_count++; - BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size); + pltsec->plt_count++; + BUG_ON(pltsec->plt_count * PLT_ENT_SIZE > pltsec->plt->sh_size); if (!idx) /* Populate a new set of entries */ @@ -129,7 +137,7 @@ static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num) /* Count how many PLT entries we may need */ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, - const Elf32_Rel *rel, int num) + const Elf32_Rel *rel, int num, Elf32_Word dstidx) { unsigned int ret = 0; const Elf32_Sym *s; @@ -144,13 +152,17 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, case R_ARM_THM_JUMP24: /* * We only have to consider branch targets that resolve - * to undefined symbols. This is not simply a heuristic, - * it is a fundamental limitation, since the PLT itself - * is part of the module, and needs to be within range - * as well, so modules can never grow beyond that limit. + * to symbols that are defined in a different section. + * This is not simply a heuristic, it is a fundamental + * limitation, since there is no guaranteed way to emit + * PLT entries sufficiently close to the branch if the + * section size exceeds the range of a branch + * instruction. So ignore relocations against defined + * symbols if they live in the same section as the + * relocation target. */ s = syms + ELF32_R_SYM(rel[i].r_info); - if (s->st_shndx != SHN_UNDEF) + if (s->st_shndx == dstidx) break; /* @@ -161,7 +173,12 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, * So we need to support them, but there is no need to * take them into consideration when trying to optimize * this code. So let's only check for duplicates when - * the addend is zero. + * the addend is zero. (Note that calls into the core + * module via init PLT entries could involve section + * relative symbol references with non-zero addends, for + * which we may end up emitting duplicates, but the init + * PLT is released along with the rest of the .init + * region as soon as module loading completes.) */ if (!is_zero_addend_relocation(base, rel + i) || !duplicate_rel(base, rel, i)) @@ -174,7 +191,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long plts = 0; + unsigned long core_plts = 0; + unsigned long init_plts = 0; Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; Elf32_Sym *syms = NULL; @@ -184,13 +202,15 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, */ for (s = sechdrs; s < sechdrs_end; ++s) { if (strcmp(".plt", secstrings + s->sh_name) == 0) - mod->arch.plt = s; + mod->arch.core.plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init.plt = s; else if (s->sh_type == SHT_SYMTAB) syms = (Elf32_Sym *)s->sh_addr; } - if (!mod->arch.plt) { - pr_err("%s: module PLT section missing\n", mod->name); + if (!mod->arch.core.plt || !mod->arch.init.plt) { + pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; } if (!syms) { @@ -213,16 +233,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* sort by type and symbol index */ sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL); - plts += count_plts(syms, dstsec->sh_addr, rels, numrels); + if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) + core_plts += count_plts(syms, dstsec->sh_addr, rels, + numrels, s->sh_info); + else + init_plts += count_plts(syms, dstsec->sh_addr, rels, + numrels, s->sh_info); } - mod->arch.plt->sh_type = SHT_NOBITS; - mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE, - sizeof(struct plt_entries)); - mod->arch.plt_count = 0; - - pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size); + mod->arch.core.plt->sh_type = SHT_NOBITS; + mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.core.plt_count = 0; + + mod->arch.init.plt->sh_type = SHT_NOBITS; + mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.init.plt_count = 0; + + pr_debug("%s: plt=%x, init.plt=%x\n", __func__, + mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size); return 0; } diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds index 05881e2b414c..eacb5c67f61e 100644 --- a/arch/arm/kernel/module.lds +++ b/arch/arm/kernel/module.lds @@ -1,3 +1,4 @@ SECTIONS { .plt : { BYTE(0) } + .init.plt : { BYTE(0) } } From 3e9570206d629e334314e273bb36cb33b9f8e061 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Mon, 10 Apr 2017 11:13:59 +0100 Subject: [PATCH 281/465] ARM: 8667/3: Fix memory attribute inconsistencies when using fixmap commit b089c31c519c3906c14801b6ec483e18a5152a50 upstream. To cope with the variety in ARM architectures and configurations, the pagetable attributes for kernel memory are generated at runtime to match the system the kernel finds itself on. This calculated value is stored in pgprot_kernel. However, when early fixmap support was added for ARM (commit a5f4c561b3b1) the attributes used for mappings were hard coded because pgprot_kernel is not set up early enough. Unfortunately, when fixmap is used after early boot this means the memory being mapped can have different attributes to existing mappings, potentially leading to unpredictable behaviour. A specific problem also exists due to the hard coded values not include the 'shareable' attribute which means on systems where this matters (e.g. those with multiple CPU clusters) the cache contents for a memory location can become inconsistent between CPUs. To resolve these issues we change fixmap to use the same memory attributes (from pgprot_kernel) that the rest of the kernel uses. To enable this we need to refactor the initialisation code so build_mem_type_table() is called early enough. Note, that relies on early param parsing for memory type overrides passed via the kernel command line, so we need to make sure this call is still after parse_early_params(). [ardb: keep early_fixmap_init() before param parsing, for earlycon] Fixes: a5f4c561b3b1 ("ARM: 8415/1: early fixmap support for earlycon") Tested-by: afzal mohammed Signed-off-by: Jon Medhurst Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/fixmap.h | 2 +- arch/arm/kernel/setup.c | 4 ++-- arch/arm/mm/mmu.c | 16 +++++++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 5c17d2dec777..8f967d1373f6 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -41,7 +41,7 @@ static const enum fixed_addresses __end_of_fixed_addresses = #define FIXMAP_PAGE_COMMON (L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY) -#define FIXMAP_PAGE_NORMAL (FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK) +#define FIXMAP_PAGE_NORMAL (pgprot_kernel | L_PTE_XN) #define FIXMAP_PAGE_RO (FIXMAP_PAGE_NORMAL | L_PTE_RDONLY) /* Used by set_fixmap_(io|nocache), both meant for mapping a device */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f4e54503afa9..32e1a9513dc7 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -80,7 +80,7 @@ __setup("fpe=", fpe_setup); extern void init_default_cache_policy(unsigned long); extern void paging_init(const struct machine_desc *desc); -extern void early_paging_init(const struct machine_desc *); +extern void early_mm_init(const struct machine_desc *); extern void adjust_lowmem_bounds(void); extern enum reboot_mode reboot_mode; extern void setup_dma_zone(const struct machine_desc *desc); @@ -1088,7 +1088,7 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); #ifdef CONFIG_MMU - early_paging_init(mdesc); + early_mm_init(mdesc); #endif setup_dma_zone(mdesc); xen_early_init(); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4e016d7f37b3..347cca965783 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -414,6 +414,11 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) FIXADDR_END); BUG_ON(idx >= __end_of_fixed_addresses); + /* we only support device mappings until pgprot_kernel has been set */ + if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) && + pgprot_val(pgprot_kernel) == 0)) + return; + if (pgprot_val(prot)) set_pte_at(NULL, vaddr, pte, pfn_pte(phys >> PAGE_SHIFT, prot)); @@ -1492,7 +1497,7 @@ pgtables_remap lpae_pgtables_remap_asm; * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems */ -void __init early_paging_init(const struct machine_desc *mdesc) +static void __init early_paging_init(const struct machine_desc *mdesc) { pgtables_remap *lpae_pgtables_remap; unsigned long pa_pgd; @@ -1560,7 +1565,7 @@ void __init early_paging_init(const struct machine_desc *mdesc) #else -void __init early_paging_init(const struct machine_desc *mdesc) +static void __init early_paging_init(const struct machine_desc *mdesc) { long long offset; @@ -1616,7 +1621,6 @@ void __init paging_init(const struct machine_desc *mdesc) { void *zero_page; - build_mem_type_table(); prepare_page_table(); map_lowmem(); memblock_set_current_limit(arm_lowmem_limit); @@ -1636,3 +1640,9 @@ void __init paging_init(const struct machine_desc *mdesc) empty_zero_page = virt_to_page(zero_page); __flush_dcache_page(NULL, empty_zero_page); } + +void __init early_mm_init(const struct machine_desc *mdesc) +{ + build_mem_type_table(); + early_paging_init(mdesc); +} From 6b9e12331a32b8d2ad48aad699ac62df7dfe478d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 24 Apr 2017 10:40:48 +0100 Subject: [PATCH 282/465] ARM: 8670/1: V7M: Do not corrupt vector table around v7m_invalidate_l1 call commit 6d80594936914e798b1b54b3bfe4bd68d8418966 upstream. We save/restore registers around v7m_invalidate_l1 to address pointed by r12, which is vector table, so the first eight entries are overwritten with a garbage. We already have stack setup at that stage, so use it to save/restore register. Fixes: 6a8146f420be ("ARM: 8609/1: V7M: Add support for the Cortex-M7 processor") Signed-off-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7m.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 8dea61640cc1..50497778c2e5 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -147,10 +147,10 @@ __v7m_setup_cont: @ Configure caches (if implemented) teq r8, #0 - stmneia r12, {r0-r6, lr} @ v7m_invalidate_l1 touches r0-r6 + stmneia sp, {r0-r6, lr} @ v7m_invalidate_l1 touches r0-r6 blne v7m_invalidate_l1 teq r8, #0 @ re-evalutae condition - ldmneia r12, {r0-r6, lr} + ldmneia sp, {r0-r6, lr} @ Configure the System Control Register to ensure 8-byte stack alignment @ Note the STKALIGN bit is either RW or RAO. From 87cb676e83f8025e00c2bace1acfee21317fbc83 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 10 Apr 2017 10:25:16 +0200 Subject: [PATCH 283/465] ARM: dts: at91: sama5d3_xplained: fix ADC vref commit 9cdd31e5913c1f86dce7e201b086155b3f24896b upstream. The voltage reference for the ADC is not 3V but 3.3V since it is connected to VDDANA. Signed-off-by: Ludovic Desroches Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d3_xplained.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts index c51fc652f6c7..7a0fa1aeb440 100644 --- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts @@ -162,6 +162,7 @@ }; adc0: adc@f8018000 { + atmel,adc-vref = <3300>; pinctrl-0 = < &pinctrl_adc0_adtrg &pinctrl_adc0_ad0 From 8539c6b56ac5ea132146466c9774b13be64dcd12 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 10 Apr 2017 10:25:17 +0200 Subject: [PATCH 284/465] ARM: dts: at91: sama5d3_xplained: not all ADC channels are available commit d3df1ec06353e51fc44563d2e7e18d42811af290 upstream. Remove ADC channels that are not available by default on the sama5d3_xplained board (resistor not populated) in order to not create confusion. Signed-off-by: Ludovic Desroches Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d3_xplained.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts index 7a0fa1aeb440..5a53fcf542ab 100644 --- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts @@ -163,9 +163,9 @@ adc0: adc@f8018000 { atmel,adc-vref = <3300>; + atmel,adc-channels-used = <0xfe>; pinctrl-0 = < &pinctrl_adc0_adtrg - &pinctrl_adc0_ad0 &pinctrl_adc0_ad1 &pinctrl_adc0_ad2 &pinctrl_adc0_ad3 @@ -173,8 +173,6 @@ &pinctrl_adc0_ad5 &pinctrl_adc0_ad6 &pinctrl_adc0_ad7 - &pinctrl_adc0_ad8 - &pinctrl_adc0_ad9 >; status = "okay"; }; From 1116274777424b1a8905c015e427843b0ee4a3a7 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 5 May 2017 14:00:17 +0300 Subject: [PATCH 285/465] ARM: dts: imx6sx-sdb: Remove OPP override commit d8581c7c8be172dac156a19d261f988a72ce596f upstream. The board file for imx6sx-sdb overrides cpufreq operating points to use higher voltages. This is done because the board has a shared rail for VDD_ARM_IN and VDD_SOC_IN and when using LDO bypass the shared voltage needs to be a value suitable for both ARM and SOC. This only applies to LDO bypass mode, a feature not present in upstream. When LDOs are enabled the effect is to use higher voltages than necessary for no good reason. Setting these higher voltages can make some boards fail to boot with ugly semi-random crashes reminiscent of memory corruption. These failures only happen on board rev. C, rev. B is reported to still work. Signed-off-by: Leonard Crestez Fixes: 54183bd7f766 ("ARM: imx6sx-sdb: add revb board and make it default") Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6sx-sdb.dts | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts index 5bb8fd57e7f5..d71da30c9cff 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dts +++ b/arch/arm/boot/dts/imx6sx-sdb.dts @@ -12,23 +12,6 @@ model = "Freescale i.MX6 SoloX SDB RevB Board"; }; -&cpu0 { - operating-points = < - /* kHz uV */ - 996000 1250000 - 792000 1175000 - 396000 1175000 - 198000 1175000 - >; - fsl,soc-operating-points = < - /* ARM kHz SOC uV */ - 996000 1250000 - 792000 1175000 - 396000 1175000 - 198000 1175000 - >; -}; - &i2c1 { clock-frequency = <100000>; pinctrl-names = "default"; From ee6dfbff78289b671c9dce9c141d68d86fd617fe Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 16 Mar 2017 15:03:24 +0100 Subject: [PATCH 286/465] arm64: dts: hi6220: Reset the mmc hosts commit 0fbdf9953b41c28845fe8d05007ff09634ee3000 upstream. The MMC hosts could be left in an unconsistent or uninitialized state from the firmware. Instead of assuming, the firmware did the right things, let's reset the host controllers. This change fixes a bug when the mmc2/sdio is initialized leading to a hung task: [ 242.704294] INFO: task kworker/7:1:675 blocked for more than 120 seconds. [ 242.711129] Not tainted 4.9.0-rc8-00017-gcf0251f #3 [ 242.716571] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.724435] kworker/7:1 D 0 675 2 0x00000000 [ 242.729973] Workqueue: events_freezable mmc_rescan [ 242.734796] Call trace: [ 242.737269] [] __switch_to+0xa8/0xb4 [ 242.742437] [] __schedule+0x1c0/0x67c [ 242.747689] [] schedule+0x40/0xa0 [ 242.752594] [] schedule_timeout+0x1c4/0x35c [ 242.758366] [] wait_for_common+0xd0/0x15c [ 242.763964] [] wait_for_completion+0x28/0x34 [ 242.769825] [] mmc_wait_for_req_done+0x40/0x124 [ 242.775949] [] mmc_wait_for_req+0xc0/0xf8 [ 242.781549] [] mmc_wait_for_cmd+0x6c/0x84 [ 242.787149] [] mmc_io_rw_direct_host+0x9c/0x114 [ 242.793270] [] sdio_reset+0x34/0x7c [ 242.798347] [] mmc_rescan+0x2fc/0x360 [ ... ] Signed-off-by: Daniel Lezcano Signed-off-by: Wei Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 470461ddd427..1e5129b19280 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -774,6 +774,7 @@ clocks = <&sys_ctrl 2>, <&sys_ctrl 1>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>; + reset-names = "reset"; bus-width = <0x8>; vmmc-supply = <&ldo19>; pinctrl-names = "default"; @@ -797,6 +798,7 @@ clocks = <&sys_ctrl 4>, <&sys_ctrl 3>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>; + reset-names = "reset"; vqmmc-supply = <&ldo7>; vmmc-supply = <&ldo10>; bus-width = <0x4>; @@ -815,6 +817,7 @@ clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>; + reset-names = "reset"; bus-width = <0x4>; broken-cd; pinctrl-names = "default", "idle"; From b55563999fa951d705149a43be3e6a30325417d6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:33 +0100 Subject: [PATCH 287/465] arm64: xchg: hazard against entire exchange variable commit fee960bed5e857eb126c4e56dd9ff85938356579 upstream. The inline assembly in __XCHG_CASE() uses a +Q constraint to hazard against other accesses to the memory location being exchanged. However, the pointer passed to the constraint is a u8 pointer, and thus the hazard only applies to the first byte of the location. GCC can take advantage of this, assuming that other portions of the location are unchanged, as demonstrated with the following test case: union u { unsigned long l; unsigned int i[2]; }; unsigned long update_char_hazard(union u *u) { unsigned int a, b; a = u->i[1]; asm ("str %1, %0" : "+Q" (*(char *)&u->l) : "r" (0UL)); b = u->i[1]; return a ^ b; } unsigned long update_long_hazard(union u *u) { unsigned int a, b; a = u->i[1]; asm ("str %1, %0" : "+Q" (*(long *)&u->l) : "r" (0UL)); b = u->i[1]; return a ^ b; } The linaro 15.08 GCC 5.1.1 toolchain compiles the above as follows when using -O2 or above: 0000000000000000 : 0: d2800001 mov x1, #0x0 // #0 4: f9000001 str x1, [x0] 8: d2800000 mov x0, #0x0 // #0 c: d65f03c0 ret 0000000000000010 : 10: b9400401 ldr w1, [x0,#4] 14: d2800002 mov x2, #0x0 // #0 18: f9000002 str x2, [x0] 1c: b9400400 ldr w0, [x0,#4] 20: 4a000020 eor w0, w1, w0 24: d65f03c0 ret This patch fixes the issue by passing an unsigned long pointer into the +Q constraint, as we do for our cmpxchg code. This may hazard against more than is necessary, but this is better than missing a necessary hazard. Fixes: 305d454aaa29 ("arm64: atomics: implement native {relaxed, acquire, release} atomics") Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cmpxchg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 91b26d26af8a..ae852add053d 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -46,7 +46,7 @@ static inline unsigned long __xchg_case_##name(unsigned long x, \ " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ __nops(3) \ " " #nop_lse) \ - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ + : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr) \ : "r" (x) \ : cl); \ \ From 2339f63ef9ed6deb3312eddc9ea40e444b9da4ef Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:34 +0100 Subject: [PATCH 288/465] arm64: ensure extension of smp_store_release value commit 994870bead4ab19087a79492400a5478e2906196 upstream. When an inline assembly operand's type is narrower than the register it is allocated to, the least significant bits of the register (up to the operand type's width) are valid, and any other bits are permitted to contain any arbitrary value. This aligns with the AAPCS64 parameter passing rules. Our __smp_store_release() implementation does not account for this, and implicitly assumes that operands have been zero-extended to the width of the type being stored to. Thus, we may store unknown values to memory when the value type is narrower than the pointer type (e.g. when storing a char to a long). This patch fixes the issue by casting the value operand to the same width as the pointer operand in all cases, which ensures that the value is zero-extended as we expect. We use the same union trickery as __smp_load_acquire and {READ,WRITE}_ONCE() to avoid GCC complaining that pointers are potentially cast to narrower width integers in unreachable paths. A whitespace issue at the top of __smp_store_release() is also corrected. No changes are necessary for __smp_load_acquire(). Load instructions implicitly clear any upper bits of the register, and the compiler will only consider the least significant bits of the register as valid regardless. Fixes: 47933ad41a86 ("arch: Introduce smp_load_acquire(), smp_store_release()") Fixes: 878a84d5a8a1 ("arm64: add missing data types in smp_load_acquire/smp_store_release") Acked-by: Will Deacon Signed-off-by: Mark Rutland Cc: Matthias Kaehlcke Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/barrier.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 4e0497f581a0..0fe7e43b7fbc 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -42,25 +42,35 @@ #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) -#define __smp_store_release(p, v) \ +#define __smp_store_release(p, v) \ do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ compiletime_assert_atomic_type(*p); \ switch (sizeof(*p)) { \ case 1: \ asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u8 *)__u.__c) \ + : "memory"); \ break; \ case 2: \ asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u16 *)__u.__c) \ + : "memory"); \ break; \ case 4: \ asm volatile ("stlr %w1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u32 *)__u.__c) \ + : "memory"); \ break; \ case 8: \ asm volatile ("stlr %1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u64 *)__u.__c) \ + : "memory"); \ break; \ } \ } while (0) From 1f13dc4c221c0927ebfbe320ed20c372cda1ab42 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:36 +0100 Subject: [PATCH 289/465] arm64: armv8_deprecated: ensure extension of addr commit 55de49f9aa17b0b2b144dd2af587177b9aadf429 upstream. Our compat swp emulation holds the compat user address in an unsigned int, which it passes to __user_swpX_asm(). When a 32-bit value is passed in a register, the upper 32 bits of the register are unknown, and we must extend the value to 64 bits before we can use it as a base address. This patch casts the address to unsigned long to ensure it has been suitably extended, avoiding the potential issue, and silencing a related warning from clang. Fixes: bd35a4adc413 ("arm64: Port SWP/SWPB emulation support from arm") Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/armv8_deprecated.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 657977e77ec8..f0e6d717885b 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -306,7 +306,8 @@ do { \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ - : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \ + : "r" ((unsigned long)addr), "i" (-EAGAIN), \ + "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ uaccess_disable(); \ From 87e2b8c096aa949bac679e870f873587576fabd7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:35 +0100 Subject: [PATCH 290/465] arm64: uaccess: ensure extension of access_ok() addr commit a06040d7a791a9177581dcf7293941bd92400856 upstream. Our access_ok() simply hands its arguments over to __range_ok(), which implicitly assummes that the addr parameter is 64 bits wide. This isn't necessarily true for compat code, which might pass down a 32-bit address parameter. In these cases, we don't have a guarantee that the address has been zero extended to 64 bits, and the upper bits of the register may contain unknown values, potentially resulting in a suprious failure. Avoid this by explicitly casting the addr parameter to an unsigned long (as is done on other architectures), ensuring that the parameter is widened appropriately. Fixes: 0aea86a2176c ("arm64: User access library functions") Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5308d696311b..ed3ecf14ce95 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -95,11 +95,12 @@ static inline void set_fs(mm_segment_t fs) */ #define __range_ok(addr, size) \ ({ \ + unsigned long __addr = (unsigned long __force)(addr); \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ : "=&r" (flag), "=&r" (roksum) \ - : "1" (addr), "Ir" (size), \ + : "1" (__addr), "Ir" (size), \ "r" (current_thread_info()->addr_limit) \ : "cc"); \ flag; \ From 5de6ca05b64fff78d1e37e140e7b5bb17592fc4a Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:45 +0100 Subject: [PATCH 291/465] arm64: traps: fix userspace cache maintenance emulation on a tagged pointer commit 81cddd65b5c82758ea5571a25e31ff6f1f89ff02 upstream. When we emulate userspace cache maintenance in the kernel, we can currently send the task a SIGSEGV even though the maintenance was done on a valid address. This happens if the address has a non-zero address tag, and happens to not be mapped in. When we get the address from a user register, we don't currently remove the address tag before performing cache maintenance on it. If the maintenance faults, we end up in either __do_page_fault, where find_vma can't find the VMA if the address has a tag, or in do_translation_fault, where the tagged address will appear to be above TASK_SIZE. In both cases, the address is not mapped in, and the task is sent a SIGSEGV. This patch removes the tag from the address before using it. With this patch, the fault is handled correctly, the address gets mapped in, and the cache maintenance succeeds. As a second bug, if cache maintenance (correctly) fails on an invalid tagged address, the address gets passed into arm64_notify_segfault, where find_vma fails to find the VMA due to the tag, and the wrong si_code may be sent as part of the siginfo_t of the segfault. With this patch, the correct si_code is sent. Fixes: 7dd01aef0557 ("arm64: trap userspace "dc cvau" cache operation on errata-affected core") Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e52be6aa44ee..45c8eca951bc 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -443,7 +443,7 @@ int cpu_enable_cache_maint_trap(void *__unused) } #define __user_cache_maint(insn, address, res) \ - if (untagged_addr(address) >= user_addr_max()) { \ + if (address >= user_addr_max()) { \ res = -EFAULT; \ } else { \ uaccess_ttbr0_enable(); \ @@ -469,7 +469,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; - address = pt_regs_read_reg(regs, rt); + address = untagged_addr(pt_regs_read_reg(regs, rt)); switch (crm) { case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ From 684f9dee46e39d1ebfb28271d79027b85471f57c Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:46 +0100 Subject: [PATCH 292/465] arm64: hw_breakpoint: fix watchpoint matching for tagged pointers commit 7dcd9dd8cebe9fa626af7e2358d03a37041a70fb upstream. When we take a watchpoint exception, the address that triggered the watchpoint is found in FAR_EL1. We compare it to the address of each configured watchpoint to see which one was hit. The configured watchpoint addresses are untagged, while the address in FAR_EL1 will have an address tag if the data access was done using a tagged address. The tag needs to be removed to compare the address to the watchpoints. Currently we don't remove it, and as a result can report the wrong watchpoint as being hit (specifically, always either the highest TTBR0 watchpoint or lowest TTBR1 watchpoint). This patch removes the tag. Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0") Acked-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/uaccess.h | 6 +++--- arch/arm64/kernel/hw_breakpoint.c | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index ed3ecf14ce95..e6540471dcda 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -107,9 +107,9 @@ static inline void set_fs(mm_segment_t fs) }) /* - * When dealing with data aborts or instruction traps we may end up with - * a tagged userland pointer. Clear the tag to get a sane pointer to pass - * on to access_ok(), for instance. + * When dealing with data aborts, watchpoints, or instruction traps we may end + * up with a tagged userland pointer. Clear the tag to get a sane pointer to + * pass on to access_ok(), for instance. */ #define untagged_addr(addr) sign_extend64(addr, 55) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0296e7924240..749f81779420 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -36,6 +36,7 @@ #include #include #include +#include /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -721,6 +722,8 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, u64 wp_low, wp_high; u32 lens, lene; + addr = untagged_addr(addr); + lens = __ffs(ctrl->len); lene = __fls(ctrl->len); From 45b1f35406bd02b6f58f635ba7c43cc552c3e6d8 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:47 +0100 Subject: [PATCH 293/465] arm64: entry: improve data abort handling of tagged pointers commit 276e93279a630657fff4b086ba14c95955912dfa upstream. When handling a data abort from EL0, we currently zero the top byte of the faulting address, as we assume the address is a TTBR0 address, which may contain a non-zero address tag. However, the address may be a TTBR1 address, in which case we should not zero the top byte. This patch fixes that. The effect is that the full TTBR1 address is passed to the task's signal handler (or printed out in the kernel log). When handling a data abort from EL1, we leave the faulting address intact, as we assume it's either a TTBR1 address or a TTBR0 address with tag 0x00. This is true as far as I'm aware, we don't seem to access a tagged TTBR0 address anywhere in the kernel. Regardless, it's easy to forget about address tags, and code added in the future may not always remember to remove tags from addresses before accessing them. So add tag handling to the EL1 data abort handler as well. This also makes it consistent with the EL0 data abort handler. Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0") Reviewed-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/asm-uaccess.h | 9 +++++++++ arch/arm64/kernel/entry.S | 5 +++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index df411f3e083c..ecd9788cd298 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -62,4 +62,13 @@ alternative_if ARM64_ALT_PAN_NOT_UAO alternative_else_nop_endif .endm +/* + * Remove the address tag from a virtual address, if present. + */ + .macro clear_address_tag, dst, addr + tst \addr, #(1 << 55) + bic \dst, \addr, #(0xff << 56) + csel \dst, \dst, \addr, eq + .endm + #endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 43512d4d7df2..b738880350f9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,12 +428,13 @@ el1_da: /* * Data abort handling */ - mrs x0, far_el1 + mrs x3, far_el1 enable_dbg // re-enable interrupts if they were enabled in the aborted context tbnz x23, #7, 1f // PSR_I_BIT enable_irq 1: + clear_address_tag x0, x3 mov x2, sp // struct pt_regs bl do_mem_abort @@ -594,7 +595,7 @@ el0_da: // enable interrupts before calling the main handler enable_dbg_and_irq ct_user_exit - bic x0, x26, #(0xff << 56) + clear_address_tag x0, x26 mov x1, x25 mov x2, sp bl do_mem_abort From e3b8dd546d247c28f6b82b2d002e0a0d37b151f9 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:48 +0100 Subject: [PATCH 294/465] arm64: documentation: document tagged pointer stack constraints commit f0e421b1bf7af97f026e1bb8bfe4c5a7a8c08f42 upstream. Some kernel features don't currently work if a task puts a non-zero address tag in its stack pointer, frame pointer, or frame record entries (FP, LR). For example, with a tagged stack pointer, the kernel can't deliver signals to the process, and the task is killed instead. As another example, with a tagged frame pointer or frame records, perf fails to generate call graphs or resolve symbols. For now, just document these limitations, instead of finding and fixing everything that doesn't work, as it's not known if anyone needs to use tags in these places anyway. In addition, as requested by Dave Martin, generalize the limitations into a general kernel address tag policy, and refactor tagged-pointers.txt to include it. Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0") Reviewed-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/tagged-pointers.txt | 62 +++++++++++++++++++------ 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt index d9995f1f51b3..a25a99e82bb1 100644 --- a/Documentation/arm64/tagged-pointers.txt +++ b/Documentation/arm64/tagged-pointers.txt @@ -11,24 +11,56 @@ in AArch64 Linux. The kernel configures the translation tables so that translations made via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of the virtual address ignored by the translation hardware. This frees up -this byte for application use, with the following caveats: +this byte for application use. - (1) The kernel requires that all user addresses passed to EL1 - are tagged with tag 0x00. This means that any syscall - parameters containing user virtual addresses *must* have - their top byte cleared before trapping to the kernel. - (2) Non-zero tags are not preserved when delivering signals. - This means that signal handlers in applications making use - of tags cannot rely on the tag information for user virtual - addresses being maintained for fields inside siginfo_t. - One exception to this rule is for signals raised in response - to watchpoint debug exceptions, where the tag information - will be preserved. +Passing tagged addresses to the kernel +-------------------------------------- - (3) Special care should be taken when using tagged pointers, - since it is likely that C compilers will not hazard two - virtual addresses differing only in the upper byte. +All interpretation of userspace memory addresses by the kernel assumes +an address tag of 0x00. + +This includes, but is not limited to, addresses found in: + + - pointer arguments to system calls, including pointers in structures + passed to system calls, + + - the stack pointer (sp), e.g. when interpreting it to deliver a + signal, + + - the frame pointer (x29) and frame records, e.g. when interpreting + them to generate a backtrace or call graph. + +Using non-zero address tags in any of these locations may result in an +error code being returned, a (fatal) signal being raised, or other modes +of failure. + +For these reasons, passing non-zero address tags to the kernel via +system calls is forbidden, and using a non-zero address tag for sp is +strongly discouraged. + +Programs maintaining a frame pointer and frame records that use non-zero +address tags may suffer impaired or inaccurate debug and profiling +visibility. + + +Preserving tags +--------------- + +Non-zero tags are not preserved when delivering signals. This means that +signal handlers in applications making use of tags cannot rely on the +tag information for user virtual addresses being maintained for fields +inside siginfo_t. One exception to this rule is for signals raised in +response to watchpoint debug exceptions, where the tag information will +be preserved. The architecture prevents the use of a tagged PC, so the upper byte will be set to a sign-extension of bit 55 on exception return. + + +Other considerations +-------------------- + +Special care should be taken when using tagged pointers, since it is +likely that C compilers will not hazard two virtual addresses differing +only in the upper byte. From e478ae4e63abfbf1c4cd6face4aef44c823fd016 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Thu, 4 May 2017 10:58:20 +0100 Subject: [PATCH 295/465] staging: vc04_services: Fix bulk cache maintenance commit ff92b9e3c9f85fa442c430d70bf075499e1193b7 upstream. vchiq_arm supports transfers less than one page and at arbitrary alignment, using the dma-mapping API to perform its cache maintenance (even though the VPU drives the DMA hardware). Read (DMA_FROM_DEVICE) operations use cache invalidation for speed, falling back to clean+invalidate on partial cache lines, with writes (DMA_TO_DEVICE) using flushes. If a read transfer has ends which aren't page-aligned, performing cache maintenance as if they were whole pages can lead to memory corruption since the partial cache lines at the ends (and any cache lines before or after the transfer area) will be invalidated. This bug was masked until the disabling of the cache flush in flush_dcache_page(). Honouring the requested transfer start- and end-points prevents the corruption. Fixes: cf9caf192988 ("staging: vc04_services: Replace dmac_map_area with dmac_map_sg") Signed-off-by: Phil Elwell Reported-by: Stefan Wahren Tested-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman --- .../interface/vchiq_arm/vchiq_2835_arm.c | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c index 3aeffcb9c87e..02e97367cb88 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -501,8 +501,15 @@ create_pagelist(char __user *buf, size_t count, unsigned short type, */ sg_init_table(scatterlist, num_pages); /* Now set the pages for each scatterlist */ - for (i = 0; i < num_pages; i++) - sg_set_page(scatterlist + i, pages[i], PAGE_SIZE, 0); + for (i = 0; i < num_pages; i++) { + unsigned int len = PAGE_SIZE - offset; + + if (len > count) + len = count; + sg_set_page(scatterlist + i, pages[i], len, offset); + offset = 0; + count -= len; + } dma_buffers = dma_map_sg(g_dev, scatterlist, @@ -523,20 +530,20 @@ create_pagelist(char __user *buf, size_t count, unsigned short type, u32 addr = sg_dma_address(sg); /* Note: addrs is the address + page_count - 1 - * The firmware expects the block to be page + * The firmware expects blocks after the first to be page- * aligned and a multiple of the page size */ WARN_ON(len == 0); - WARN_ON(len & ~PAGE_MASK); - WARN_ON(addr & ~PAGE_MASK); + WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); + WARN_ON(i && (addr & ~PAGE_MASK)); if (k > 0 && - ((addrs[k - 1] & PAGE_MASK) | - ((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT) - == addr) { - addrs[k - 1] += (len >> PAGE_SHIFT); - } else { - addrs[k++] = addr | ((len >> PAGE_SHIFT) - 1); - } + ((addrs[k - 1] & PAGE_MASK) + + (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) + == (addr & PAGE_MASK)) + addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT); + else + addrs[k++] = (addr & PAGE_MASK) | + (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1); } /* Partial cache lines (fragments) require special measures */ From d92507d9e5b03f3e12c38108015d80f9cd5ff447 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:43 +0100 Subject: [PATCH 296/465] staging: rtl8192e: rtl92e_fill_tx_desc fix write to mapped out memory. commit baabd567f87be05330faa5140f72a91960e7405a upstream. The driver attempts to alter memory that is mapped to PCI device. This is because tx_fwinfo_8190pci points to skb->data Move the pci_map_single to when completed buffer is ready to be mapped with psdec is empty to drop on mapping error. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index 8d6bca61e7aa..fa1440e15382 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -1184,8 +1184,7 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, struct cb_desc *cb_desc, struct sk_buff *skb) { struct r8192_priv *priv = rtllib_priv(dev); - dma_addr_t mapping = pci_map_single(priv->pdev, skb->data, skb->len, - PCI_DMA_TODEVICE); + dma_addr_t mapping; struct tx_fwinfo_8190pci *pTxFwInfo; pTxFwInfo = (struct tx_fwinfo_8190pci *)skb->data; @@ -1196,8 +1195,6 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, pTxFwInfo->Short = _rtl92e_query_is_short(pTxFwInfo->TxHT, pTxFwInfo->TxRate, cb_desc); - if (pci_dma_mapping_error(priv->pdev, mapping)) - netdev_err(dev, "%s(): DMA Mapping error\n", __func__); if (cb_desc->bAMPDUEnable) { pTxFwInfo->AllowAggregation = 1; pTxFwInfo->RxMF = cb_desc->ampdu_factor; @@ -1232,6 +1229,14 @@ void rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc, } memset((u8 *)pdesc, 0, 12); + + mapping = pci_map_single(priv->pdev, skb->data, skb->len, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(priv->pdev, mapping)) { + netdev_err(dev, "%s(): DMA Mapping error\n", __func__); + return; + } + pdesc->LINIP = 0; pdesc->CmdInit = 1; pdesc->Offset = sizeof(struct tx_fwinfo_8190pci) + 8; From 3e57290a19ce998bda124eab7b65472f547aa860 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:44 +0100 Subject: [PATCH 297/465] staging: rtl8192e: fix 2 byte alignment of register BSSIDR. commit 867510bde14e7b7fc6dd0f50b48f6753cfbd227a upstream. BSSIDR has two byte alignment on PCI ioremap correct the write by swapping to 16 bits first. This fixes a problem that the device associates fail because the filter is not set correctly. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index fa1440e15382..f1f307fa87dd 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -97,8 +97,9 @@ void rtl92e_set_reg(struct net_device *dev, u8 variable, u8 *val) switch (variable) { case HW_VAR_BSSID: - rtl92e_writel(dev, BSSIDR, ((u32 *)(val))[0]); - rtl92e_writew(dev, BSSIDR+2, ((u16 *)(val+2))[0]); + /* BSSIDR 2 byte alignment */ + rtl92e_writew(dev, BSSIDR, *(u16 *)val); + rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(val + 2)); break; case HW_VAR_MEDIA_STATUS: @@ -963,8 +964,8 @@ static void _rtl92e_net_update(struct net_device *dev) rtl92e_config_rate(dev, &rate_config); priv->dot11CurrentPreambleMode = PREAMBLE_AUTO; priv->basic_rate = rate_config &= 0x15f; - rtl92e_writel(dev, BSSIDR, ((u32 *)net->bssid)[0]); - rtl92e_writew(dev, BSSIDR+4, ((u16 *)net->bssid)[2]); + rtl92e_writew(dev, BSSIDR, *(u16 *)net->bssid); + rtl92e_writel(dev, BSSIDR + 2, *(u32 *)(net->bssid + 2)); if (priv->rtllib->iw_mode == IW_MODE_ADHOC) { rtl92e_writew(dev, ATIMWND, 2); From 7369a3b5ecdaa2dd9e96d3cc94a9b1048af7c623 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:45 +0100 Subject: [PATCH 298/465] staging: rtl8192e: rtl92e_get_eeprom_size Fix read size of EPROM_CMD. commit 90be652c9f157d44b9c2803f902a8839796c090d upstream. EPROM_CMD is 2 byte aligned on PCI map so calling with rtl92e_readl will return invalid data so use rtl92e_readw. The device is unable to select the right eeprom type. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c index f1f307fa87dd..591f2740e5af 100644 --- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c +++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c @@ -627,7 +627,7 @@ void rtl92e_get_eeprom_size(struct net_device *dev) struct r8192_priv *priv = rtllib_priv(dev); RT_TRACE(COMP_INIT, "===========>%s()\n", __func__); - curCR = rtl92e_readl(dev, EPROM_CMD); + curCR = rtl92e_readw(dev, EPROM_CMD); RT_TRACE(COMP_INIT, "read from Reg Cmd9346CR(%x):%x\n", EPROM_CMD, curCR); priv->epromtype = (curCR & EPROM_CMD_9356SEL) ? EEPROM_93C56 : From 1cfc2a16c55aaf5001e16a1bc213dbad2f03d1ea Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 11 May 2017 18:57:46 +0100 Subject: [PATCH 299/465] staging: rtl8192e: GetTs Fix invalid TID 7 warning. commit 95d93e271d920dfda369d4740b1cc1061d41fe7f upstream. TID 7 is a valid value for QoS IEEE 802.11e. The switch statement that follows states 7 is valid. Remove function IsACValid and use the default case to filter invalid TIDs. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl819x_TSProc.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl819x_TSProc.c b/drivers/staging/rtl8192e/rtl819x_TSProc.c index 48bbd9e8a52f..dcc4eb691889 100644 --- a/drivers/staging/rtl8192e/rtl819x_TSProc.c +++ b/drivers/staging/rtl8192e/rtl819x_TSProc.c @@ -306,11 +306,6 @@ static void MakeTSEntry(struct ts_common_info *pTsCommonInfo, u8 *Addr, pTsCommonInfo->TClasNum = TCLAS_Num; } -static bool IsACValid(unsigned int tid) -{ - return tid < 7; -} - bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS, u8 *Addr, u8 TID, enum tr_select TxRxSelect, bool bAddNewTs) { @@ -328,12 +323,6 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS, if (ieee->current_network.qos_data.supported == 0) { UP = 0; } else { - if (!IsACValid(TID)) { - netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n", - __func__, TID); - return false; - } - switch (TID) { case 0: case 3: @@ -351,6 +340,10 @@ bool GetTs(struct rtllib_device *ieee, struct ts_common_info **ppTS, case 7: UP = 7; break; + default: + netdev_warn(ieee->dev, "%s(): TID(%d) is not valid\n", + __func__, TID); + return false; } } From 4fd7fd47a04195d4d74fa1283cd6e6d600e92929 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Fri, 5 May 2017 11:39:59 -0700 Subject: [PATCH 300/465] iommu/vt-d: Flush the IOTLB to get rid of the initial kdump mappings commit f73a7eee900e95404b61408a23a1df5c5811704c upstream. Ever since commit 091d42e43d ("iommu/vt-d: Copy translation tables from old kernel") the kdump kernel copies the IOMMU context tables from the previous kernel. Each device mappings will be destroyed once the driver for the respective device takes over. This unfortunately breaks the workflow of mapping and unmapping a new context to the IOMMU. The mapping function assumes that either: 1) Unmapping did the proper IOMMU flushing and it only ever flush if the IOMMU unit supports caching invalid entries. 2) The system just booted and the initialization code took care of flushing all IOMMU caches. This assumption is not true for the kdump kernel since the context tables have been copied from the previous kernel and translations could have been cached ever since. So make sure to flush the IOTLB as well when we destroy these old copied mappings. Cc: Joerg Roedel Cc: David Woodhouse Cc: David Woodhouse Cc: Anthony Liguori Signed-off-by: KarimAllah Ahmed Acked-by: David Woodhouse Fixes: 091d42e43d ("iommu/vt-d: Copy translation tables from old kernel") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d412a313a372..478130d0a495 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2050,11 +2050,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_copied(context)) { u16 did_old = context_domain_id(context); - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) + if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } } pgd = domain->pgd; From 4b701379312181d0339aca37646c341cd80636b2 Mon Sep 17 00:00:00 2001 From: "Li, Fei" Date: Thu, 27 Apr 2017 01:47:25 +0000 Subject: [PATCH 301/465] cpuidle: check dev before usage in cpuidle_use_deepest_state() commit 41dc750ea67f317c0deedde713d1728425524ef2 upstream. In case of there is no cpuidle devices registered, dev will be null, and panic will be triggered like below; In this patch, add checking of dev before usage, like that done in cpuidle_idle_call. Panic without fix: [ 184.961328] BUG: unable to handle kernel NULL pointer dereference at (null) [ 184.961328] IP: cpuidle_use_deepest_state+0x30/0x60 ... [ 184.961328] play_idle+0x8d/0x210 [ 184.961328] ? __schedule+0x359/0x8e0 [ 184.961328] ? _raw_spin_unlock_irqrestore+0x28/0x50 [ 184.961328] ? kthread_queue_delayed_work+0x41/0x80 [ 184.961328] clamp_idle_injection_func+0x64/0x1e0 Fixes: bb8313b603eb8 (cpuidle: Allow enforcing deepest idle state selection) Signed-off-by: Li, Fei Tested-by: Shi, Feng Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 548b90be7685..2706be7ed334 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -111,7 +111,8 @@ void cpuidle_use_deepest_state(bool enable) preempt_disable(); dev = cpuidle_get_device(); - dev->use_deepest_state = enable; + if (dev) + dev->use_deepest_state = enable; preempt_enable(); } From 160f7df55ba99818bd903af21cafdb5758bbb356 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 28 Apr 2017 10:50:26 +0100 Subject: [PATCH 302/465] metag/uaccess: Fix access_ok() commit 8a8b56638bcac4e64cccc88bf95a0f9f4b19a2fb upstream. The __user_bad() macro used by access_ok() has a few corner cases noticed by Al Viro where it doesn't behave correctly: - The kernel range check has off by 1 errors which permit access to the first and last byte of the kernel mapped range. - The kernel range check ends at LINCORE_BASE rather than META_MEMORY_LIMIT, which is ineffective when the kernel is in global space (an extremely uncommon configuration). There are a couple of other shortcomings here too: - Access to the whole of the other address space is permitted (i.e. the global half of the address space when the kernel is in local space). This isn't ideal as it could theoretically still contain privileged mappings set up by the bootloader. - The size argument is unused, permitting user copies which start on valid pages at the end of the user address range and cross the boundary into the kernel address space (e.g. addr = 0x3ffffff0, size > 0x10). It isn't very convenient to add size checks when disallowing certain regions, and it seems far safer to be sure and explicit about what userland is able to access, so invert the logic to allow certain regions instead, and fix the off by 1 errors and missing size checks. This also allows the get_fs() == KERNEL_DS check to be more easily optimised into the user address range case. We now have 3 such allowed regions: - The user address range (incorporating the get_fs() == KERNEL_DS check). - NULL (some kernel code expects this to work, and we'll always catch the fault anyway). - The core code memory region. Fixes: 373cd784d0fc ("metag: Memory handling") Reported-by: Al Viro Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/include/asm/uaccess.h | 40 +++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 07238b39638c..ed6cd90ec5ca 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -28,24 +28,32 @@ #define segment_eq(a, b) ((a).seg == (b).seg) -#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) -/* - * Explicitly allow NULL pointers here. Parts of the kernel such - * as readv/writev use access_ok to validate pointers, but want - * to allow NULL pointers for various reasons. NULL pointers are - * safe to allow through because the first page is not mappable on - * Meta. - * - * We also wish to avoid letting user code access the system area - * and the kernel half of the address space. - */ -#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \ - ((addr) > PAGE_OFFSET && \ - (addr) < LINCORE_BASE)) - static inline int __access_ok(unsigned long addr, unsigned long size) { - return __kernel_ok || !__user_bad(addr, size); + /* + * Allow access to the user mapped memory area, but not the system area + * before it. The check extends to the top of the address space when + * kernel access is allowed (there's no real reason to user copy to the + * system area in any case). + */ + if (likely(addr >= META_MEMORY_BASE && addr < get_fs().seg && + size <= get_fs().seg - addr)) + return true; + /* + * Explicitly allow NULL pointers here. Parts of the kernel such + * as readv/writev use access_ok to validate pointers, but want + * to allow NULL pointers for various reasons. NULL pointers are + * safe to allow through because the first page is not mappable on + * Meta. + */ + if (!addr) + return true; + /* Allow access to core code memory area... */ + if (addr >= LINCORE_CODE_BASE && addr <= LINCORE_CODE_LIMIT && + size <= LINCORE_CODE_LIMIT + 1 - addr) + return true; + /* ... but no other areas. */ + return false; } #define access_ok(type, addr, size) __access_ok((unsigned long)(addr), \ From 88f7b253a92141dc65a48718000355810b665680 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 2 May 2017 19:41:06 +0100 Subject: [PATCH 303/465] metag/uaccess: Check access_ok in strncpy_from_user commit 3a158a62da0673db918b53ac1440845a5b64fd90 upstream. The metag implementation of strncpy_from_user() doesn't validate the src pointer, which could allow reading of arbitrary kernel memory. Add a short access_ok() check to prevent that. Its still possible for it to read across the user/kernel boundary, but it will invariably reach a NUL character after only 9 bytes, leaking only a static kernel address being loaded into D0Re0 at the beginning of __start, which is acceptable for the immediate fix. Reported-by: Al Viro Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/include/asm/uaccess.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index ed6cd90ec5ca..3db381205928 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -194,8 +194,13 @@ do { \ extern long __must_check __strncpy_from_user(char *dst, const char __user *src, long count); -#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count) - +static inline long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + if (!access_ok(VERIFY_READ, src, 1)) + return -EFAULT; + return __strncpy_from_user(dst, src, count); +} /* * Return the size of a string (including the ending 0) * From 4c44438b566bcb0143c05be3edc83bfa95995047 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 09:32:09 -0400 Subject: [PATCH 304/465] stackprotector: Increase the per-task stack canary's random range from 32 bits to 64 bits on 64-bit platforms commit 5ea30e4e58040cfd6434c2f33dc3ea76e2c15b05 upstream. The stack canary is an 'unsigned long' and should be fully initialized to random data rather than only 32 bits of random data. Signed-off-by: Daniel Micay Acked-by: Arjan van de Ven Acked-by: Rik van Riel Acked-by: Kees Cook Cc: Arjan van Ven Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170504133209.3053-1-danielmicay@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index fa2ad9829bba..4cc564ece2cf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -536,7 +536,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) set_task_stack_end_magic(tsk); #ifdef CONFIG_CC_STACKPROTECTOR - tsk->stack_canary = get_random_int(); + tsk->stack_canary = get_random_long(); #endif /* From 3a97bedaa3896e6f51be81ac2917e4394c00a9d7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 May 2017 12:06:32 +0200 Subject: [PATCH 305/465] uwb: fix device quirk on big-endian hosts commit 41318a2b82f5d5fe1fb408f6d6e0b22aa557111d upstream. Add missing endianness conversion when using the USB device-descriptor idProduct field to apply a hardware quirk. Fixes: 1ba47da52712 ("uwb: add the i1480 DFU driver") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/i1480/dfu/usb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index 6345e85822a4..a50cf45e530f 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -341,6 +341,7 @@ int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size) static int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) { + struct usb_device *udev = interface_to_usbdev(iface); struct i1480_usb *i1480_usb; struct i1480 *i1480; struct device *dev = &iface->dev; @@ -352,8 +353,8 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) iface->cur_altsetting->desc.bInterfaceNumber); goto error; } - if (iface->num_altsetting > 1 - && interface_to_usbdev(iface)->descriptor.idProduct == 0xbabe) { + if (iface->num_altsetting > 1 && + le16_to_cpu(udev->descriptor.idProduct) == 0xbabe) { /* Need altsetting #1 [HW QUIRK] or EP1 won't work */ result = usb_set_interface(interface_to_usbdev(iface), 0, 1); if (result < 0) From ef1f1648fe3f59b5af28a5f301668fb1ee08ee47 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 May 2017 13:54:11 +0200 Subject: [PATCH 306/465] genirq: Fix chained interrupt data ordering commit 2c4569ca26986d18243f282dd727da27e9adae4c upstream. irq_set_chained_handler_and_data() sets up the chained interrupt and then stores the handler data. That's racy against an immediate interrupt which gets handled before the store of the handler data happened. The handler will dereference a NULL pointer and crash. Cure it by storing handler data before installing the chained handler. Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index be3c34e4f2ac..077c87f40f4d 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -877,8 +877,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, if (!desc) return; - __irq_do_set_handler(desc, handle, 1, NULL); desc->irq_common_data.handler_data = data; + __irq_do_set_handler(desc, handle, 1, NULL); irq_put_desc_busunlock(desc, flags); } From 7ce12d8b97d80a898b307902f9045f1b3fc9ae85 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Fri, 5 May 2017 14:52:06 -0600 Subject: [PATCH 307/465] nvme: unmap CMB and remove sysfs file in reset path commit f63572dff1421b6ca6abce71d46e03411e605c94 upstream. CMB doesn't get unmapped until removal while getting remapped on every reset. Add the unmapping and sysfs file removal to the reset path in nvme_pci_disable to match the mapping path in nvme_pci_enable. Fixes: 202021c1a ("nvme : Add sysfs entry for NVMe CMBs when appropriate") Signed-off-by: Jon Derrick Acked-by: Keith Busch Reviewed-By: Stephen Bates Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5d309535abbd..d8a17078dd57 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1394,6 +1394,11 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) if (dev->cmb) { iounmap(dev->cmb); dev->cmb = NULL; + if (dev->cmbsz) { + sysfs_remove_file_from_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL); + dev->cmbsz = 0; + } } } @@ -1665,6 +1670,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); + nvme_release_cmb(dev); pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { @@ -2062,7 +2068,6 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dev_disable(dev, true); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); - nvme_release_cmb(dev); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); nvme_put_ctrl(&dev->ctrl); From a24a206192803ddd1e0c5d6128a6eef9b07a21bd Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Mar 2017 21:00:28 +0800 Subject: [PATCH 308/465] MIPS: Loongson-3: Select MIPS_L1_CACHE_SHIFT_6 commit 17c99d9421695a0e0de18bf1e7091d859e20ec1d upstream. Some newer Loongson-3 have 64 bytes cache lines, so select MIPS_L1_CACHE_SHIFT_6. Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15755/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index e0bb576410bb..c3c7d8aa3283 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1373,6 +1373,7 @@ config CPU_LOONGSON3 select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_PGD_C0_CONTEXT + select MIPS_L1_CACHE_SHIFT_6 select GPIOLIB help The Loongson 3 processor implements the MIPS64R2 instruction From 3bbe9f26a00eb01a5bea6a3e1dd22533d6e31a7b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 3 May 2017 15:17:51 +0100 Subject: [PATCH 309/465] kvm: arm/arm64: Fix race in resetting stage2 PGD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c0d706b563af732adb094c5bf807437e8963e84 upstream. In kvm_free_stage2_pgd() we check the stage2 PGD before holding the lock and proceed to take the lock if it is valid. And we unmap the page tables, followed by releasing the lock. We reset the PGD only after dropping this lock, which could cause a race condition where another thread waiting on or even holding the lock, could potentially see that the PGD is still valid and proceed to perform a stage2 operation and later encounter a NULL PGD. [223090.242280] Unable to handle kernel NULL pointer dereference at virtual address 00000040 [223090.262330] PC is at unmap_stage2_range+0x8c/0x428 [223090.262332] LR is at kvm_unmap_hva_handler+0x2c/0x3c [223090.262531] Call trace: [223090.262533] [] unmap_stage2_range+0x8c/0x428 [223090.262535] [] kvm_unmap_hva_handler+0x2c/0x3c [223090.262537] [] handle_hva_to_gpa+0xb0/0x104 [223090.262539] [] kvm_unmap_hva+0x5c/0xbc [223090.262543] [] kvm_mmu_notifier_invalidate_page+0x50/0x8c [223090.262547] [] __mmu_notifier_invalidate_page+0x5c/0x84 [223090.262551] [] try_to_unmap_one+0x1d0/0x4a0 [223090.262553] [] rmap_walk+0x1cc/0x2e0 [223090.262555] [] try_to_unmap+0x74/0xa4 [223090.262557] [] migrate_pages+0x31c/0x5ac [223090.262561] [] compact_zone+0x3fc/0x7ac [223090.262563] [] compact_zone_order+0x94/0xb0 [223090.262564] [] try_to_compact_pages+0x108/0x290 [223090.262569] [] __alloc_pages_direct_compact+0x70/0x1ac [223090.262571] [] __alloc_pages_nodemask+0x434/0x9f4 [223090.262572] [] alloc_pages_vma+0x230/0x254 [223090.262574] [] do_huge_pmd_anonymous_page+0x114/0x538 [223090.262576] [] handle_mm_fault+0xd40/0x17a4 [223090.262577] [] __get_user_pages+0x12c/0x36c [223090.262578] [] get_user_pages_unlocked+0xa4/0x1b8 [223090.262579] [] __gfn_to_pfn_memslot+0x280/0x31c [223090.262580] [] gfn_to_pfn_prot+0x4c/0x5c [223090.262582] [] kvm_handle_guest_abort+0x240/0x774 [223090.262584] [] handle_exit+0x11c/0x1ac [223090.262586] [] kvm_arch_vcpu_ioctl_run+0x31c/0x648 [223090.262587] [] kvm_vcpu_ioctl+0x378/0x768 [223090.262590] [] do_vfs_ioctl+0x324/0x5a4 [223090.262591] [] SyS_ioctl+0x90/0xa4 [223090.262595] [] el0_svc_naked+0x38/0x3c This patch moves the stage2 PGD manipulation under the lock. Reported-by: Alexander Graf Cc: Mark Rutland Cc: Marc Zyngier Cc: Paolo Bonzini Cc: Radim Krčmář Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 582a972371cf..c1d81bb2d42f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -829,22 +829,22 @@ void stage2_unmap_vm(struct kvm *kvm) * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all * underlying level-2 and level-3 tables before freeing the actual level-1 table * and setting the struct pointer to NULL. - * - * Note we don't need locking here as this is only called when the VM is - * destroyed, which can only be done once. */ void kvm_free_stage2_pgd(struct kvm *kvm) { - if (kvm->arch.pgd == NULL) - return; + void *pgd = NULL; spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + if (kvm->arch.pgd) { + unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + pgd = kvm->arch.pgd; + kvm->arch.pgd = NULL; + } spin_unlock(&kvm->mmu_lock); /* Free the HW pgd, one page at a time */ - free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); - kvm->arch.pgd = NULL; + if (pgd) + free_pages_exact(pgd, S2_PGD_SIZE); } static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, From 59e1faddb3d256dbc12b0a7fe9b0ee7adebeec10 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 16 May 2017 10:34:55 +0100 Subject: [PATCH 310/465] kvm: arm/arm64: Fix use after free of stage2 page table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0c428a6a9256fcd66817e12db32a50b405ed2e5c upstream. We yield the kvm->mmu_lock occassionaly while performing an operation (e.g, unmap or permission changes) on a large area of stage2 mappings. However this could possibly cause another thread to clear and free up the stage2 page tables while we were waiting for regaining the lock and thus the original thread could end up in accessing memory that was freed. This patch fixes the problem by making sure that the stage2 pagetable is still valid after we regain the lock. The fact that mmu_notifer->release() could be called twice (via __mmu_notifier_release and mmu_notifier_unregsister) enhances the possibility of hitting this race where there are two threads trying to unmap the entire guest shadow pages. While at it, cleanup the redudant checks around cond_resched_lock in stage2_wp_range(), as cond_resched_lock already does the same checks. Cc: Mark Rutland Cc: Radim Krčmář Cc: andreyknvl@google.com Cc: Paolo Bonzini Acked-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index c1d81bb2d42f..9624b193ce1e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -295,6 +295,13 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { + /* + * Make sure the page table is still active, as another thread + * could have possibly freed the page table, while we released + * the lock. + */ + if (!READ_ONCE(kvm->arch.pgd)) + break; next = stage2_pgd_addr_end(addr, end); if (!stage2_pgd_none(*pgd)) unmap_stage2_puds(kvm, pgd, addr, next); @@ -1170,11 +1177,13 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) * large. Otherwise, we may see kernel panics with * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, * CONFIG_LOCKDEP. Additionally, holding the lock too long - * will also starve other vCPUs. + * will also starve other vCPUs. We have to also make sure + * that the page tables are not freed while we released + * the lock. */ - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) - cond_resched_lock(&kvm->mmu_lock); - + cond_resched_lock(&kvm->mmu_lock); + if (!READ_ONCE(kvm->arch.pgd)) + break; next = stage2_pgd_addr_end(addr, end); if (stage2_pgd_present(*pgd)) stage2_wp_puds(pgd, addr, next); From 90c676ffabd45acaa931b36415cb600e90054ae1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 16 May 2017 10:34:54 +0100 Subject: [PATCH 311/465] kvm: arm/arm64: Force reading uncached stage2 PGD commit 2952a6070e07ebdd5896f1f5b861acad677caded upstream. Make sure we don't use a cached value of the KVM stage2 PGD while resetting the PGD. Cc: Marc Zyngier Signed-off-by: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 9624b193ce1e..3837b096e1a6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -844,7 +844,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - pgd = kvm->arch.pgd; + pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; } spin_unlock(&kvm->mmu_lock); From 78111082778af2dd4391df547f2bb18f06236090 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 May 2017 21:47:25 -0400 Subject: [PATCH 312/465] osf_wait4(): fix infoleak commit a8c39544a6eb2093c04afd5005b6192bd0e880c6 upstream. failing sys_wait4() won't fill struct rusage... Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/osf_sys.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 6d76e528ab8f..af9189e8965b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1199,8 +1199,10 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur))) return -EFAULT; - err = 0; - err |= put_user(status, ustatus); + err = put_user(status, ustatus); + if (ret < 0) + return err ? err : ret; + err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec); err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec); err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec); From ece6ff2261337579b682dad3d93b0265eb744ab7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 11 May 2017 10:21:46 +0200 Subject: [PATCH 313/465] drbd: fix request leak introduced by locking/atomic, kref: Kill kref_sub() commit a00ebd1cf12c378a1d4f7a1d6daf1d76c1eaad82 upstream. When killing kref_sub(), the unconditional additional kref_get() was not properly paired with the necessary kref_put(), causing a leak of struct drbd_requests (~ 224 Bytes) per submitted bio, and breaking DRBD in general, as the destructor of those "drbd_requests" does more than just the mempoll_free(). Fixes: bdfafc4ffdd2 ("locking/atomic, kref: Kill kref_sub()") Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_req.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 652114ae1a8a..1fc8a671aabe 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -314,24 +314,32 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) } /* still holds resource->req_lock */ -static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) +static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) { struct drbd_device *device = req->device; D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED)); + if (!put) + return; + if (!atomic_sub_and_test(put, &req->completion_ref)) - return 0; + return; drbd_req_complete(req, m); + /* local completion may still come in later, + * we need to keep the req object around. */ + if (req->rq_state & RQ_LOCAL_ABORTED) + return; + if (req->rq_state & RQ_POSTPONED) { /* don't destroy the req object just yet, * but queue it for retry */ drbd_restart_request(req); - return 0; + return; } - return 1; + kref_put(&req->kref, drbd_req_destroy); } static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) @@ -518,12 +526,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (req->i.waiting) wake_up(&device->misc_wait); - if (c_put) { - if (drbd_req_put_completion_ref(req, m, c_put)) - kref_put(&req->kref, drbd_req_destroy); - } else { - kref_put(&req->kref, drbd_req_destroy); - } + drbd_req_put_completion_ref(req, m, c_put); + kref_put(&req->kref, drbd_req_destroy); } static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) @@ -1363,8 +1367,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request } out: - if (drbd_req_put_completion_ref(req, &m, 1)) - kref_put(&req->kref, drbd_req_destroy); + drbd_req_put_completion_ref(req, &m, 1); spin_unlock_irq(&resource->req_lock); /* Even though above is a kref_put(), this is safe. From e57f049b09ee5f3d0df898e941a8823a895f22c7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Apr 2017 12:15:10 +0900 Subject: [PATCH 314/465] um: Fix to call read_initrd after init_bootmem commit 5b4236e17cc1bd9fa14b2b0c7a4ae632d41f2e20 upstream. Since read_initrd() invokes alloc_bootmem() for allocating memory to load initrd image, it must be called after init_bootmem. This makes read_initrd() called directly from setup_arch() after init_bootmem() and mem_total_pages(). Fixes: b63236972e1 ("um: Setup physical memory in setup_arch()") Signed-off-by: Masami Hiramatsu Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/um/kernel/initrd.c | 4 +--- arch/um/kernel/um_arch.c | 6 ++++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 48bae81f8dca..6f6e7896e53f 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -14,7 +14,7 @@ static char *initrd __initdata = NULL; static int load_initrd(char *filename, void *buf, int size); -static int __init read_initrd(void) +int __init read_initrd(void) { void *area; long long size; @@ -46,8 +46,6 @@ static int __init read_initrd(void) return 0; } -__uml_postsetup(read_initrd); - static int __init uml_initrd_setup(char *line, int *add) { initrd = line; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 4b85acd4020c..64a1fd06f3fd 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -338,11 +338,17 @@ int __init linux_main(int argc, char **argv) return start_uml(); } +int __init __weak read_initrd(void) +{ + return 0; +} + void __init setup_arch(char **cmdline_p) { stack_protections((unsigned long) &init_thread_info); setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); mem_total_pages(physmem_size, iomem_size, highmem); + read_initrd(); paging_init(); strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); From 18b4b0ab6577a2b23545dbe3d6ffa4181362d550 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 12:53:05 +0100 Subject: [PATCH 315/465] firmware: ti_sci: fix strncat length check commit 76cefef8e838304a71725a0b5007c375619d78fb upstream. gcc-7 notices that the length we pass to strncat is wrong: drivers/firmware/ti_sci.c: In function 'ti_sci_probe': drivers/firmware/ti_sci.c:204:32: error: specified bound 50 equals the size of the destination [-Werror=stringop-overflow=] Instead of the total length, we must pass the length of the remaining space here. Fixes: aa276781a64a ("firmware: Add basic support for TI System Control Interface (TI-SCI) protocol") Acked-by: Nishanth Menon Acked-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/ti_sci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 874ff32db366..00cfed3c3e1a 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -202,7 +202,8 @@ static int ti_sci_debugfs_create(struct platform_device *pdev, info->debug_buffer[info->debug_region_size] = 0; info->d = debugfs_create_file(strncat(debug_name, dev_name(dev), - sizeof(debug_name)), + sizeof(debug_name) - + sizeof("ti_sci_debug@")), 0444, NULL, info, &ti_sci_debug_fops); if (IS_ERR(info->d)) return PTR_ERR(info->d); From 63ab09e39dd24438ec7908b3d8417219b0a75c7d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 May 2017 10:19:49 +0200 Subject: [PATCH 316/465] tracing/kprobes: Enforce kprobes teardown after testing commit 30e7d894c1478c88d50ce94ddcdbd7f9763d9cdd upstream. Enabling the tracer selftest triggers occasionally the warning in text_poke(), which warns when the to be modified page is not marked reserved. The reason is that the tracer selftest installs kprobes on functions marked __init for testing. These probes are removed after the tests, but that removal schedules the delayed kprobes_optimizer work, which will do the actual text poke. If the work is executed after the init text is freed, then the warning triggers. The bug can be reproduced reliably when the work delay is increased. Flush the optimizer work and wait for the optimizing/unoptimizing lists to become empty before returning from the kprobes tracer selftest. That ensures that all operations which were queued due to the probes removal have completed. Link: http://lkml.kernel.org/r/20170516094802.76a468bb@gandalf.local.home Signed-off-by: Thomas Gleixner Acked-by: Masami Hiramatsu Fixes: 6274de498 ("kprobes: Support delayed unoptimizing") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- include/linux/kprobes.h | 3 +++ kernel/kprobes.c | 2 +- kernel/trace/trace_kprobe.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index c328e4f7dcad..d0bdb986f759 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -347,6 +347,9 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +extern void wait_for_kprobe_optimizer(void); +#else +static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 699c5bc51a92..74522da4d7cd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -598,7 +598,7 @@ static void kprobe_optimizer(struct work_struct *work) } /* Wait for completing optimization and unoptimization */ -static void wait_for_kprobe_optimizer(void) +void wait_for_kprobe_optimizer(void) { mutex_lock(&kprobe_mutex); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5f688cc724f0..49cdda852165 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1511,6 +1511,11 @@ static __init int kprobe_trace_self_tests_init(void) end: release_all_trace_kprobes(); + /* + * Wait for the optimizer work to finish. Otherwise it might fiddle + * with probes in already freed __init text. + */ + wait_for_kprobe_optimizer(); if (warn) pr_cont("NG: Some tests are failed. Please check them.\n"); else From d76f9188598f847c290ab073c9b9e2bff16ae097 Mon Sep 17 00:00:00 2001 From: Dawei Chien Date: Tue, 21 Feb 2017 20:26:52 +0800 Subject: [PATCH 317/465] thermal: mt8173: minor mtk_thermal.c cleanups commit 05d7839aa290901429d8edcd8f7974c9df2bcaa5 upstream. If thermal bank with 4 sensors, thermal driver should read TEMP_MSR3. However, currently thermal driver would not read TEMP_MSR3 since mt8173 thermal driver only use 3 sensors on each thermal bank at the same time, so this patch would not effect temperature. Only if mt mt8173 thermal driver use 4 sensors on any thermal bank, would read third sensor two times, and lose fourth sensor of vale. Fixes: b7cf0053738c ("thermal: Add Mediatek thermal driver for mt2701.") Reviewed-by: Matthias Brugger Signed-off-by: Dawei Chien Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/mtk_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index 1aff7fde54b1..7737f14846f9 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -191,7 +191,7 @@ static const int mt8173_bank_data[MT8173_NUM_ZONES][3] = { }; static const int mt8173_msr[MT8173_NUM_SENSORS_PER_ZONE] = { - TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR2 + TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR3 }; static const int mt8173_adcpnp[MT8173_NUM_SENSORS_PER_ZONE] = { From b4447dbb5cd1116b738f026b582ae93ab0490169 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 21 Apr 2017 11:42:54 -0500 Subject: [PATCH 318/465] PCI/ACPI: Tidy up MCFG quirk whitespace commit ced414a14f709fc0af60bd381ba8a566dc566869 upstream. With no blank lines, it's not obvious where the macro definitions end and the uses begin. Add some blank lines and reorder the ThunderX definitions. No functional change intended. Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/pci_mcfg.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 2944353253ed..65affd8f29c1 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -54,6 +54,7 @@ static struct mcfg_fixup mcfg_quirks[] = { #define QCOM_ECAM32(seg) \ { "QCOM ", "QDF2432 ", 1, seg, MCFG_BUS_ANY, &pci_32b_ops } + QCOM_ECAM32(0), QCOM_ECAM32(1), QCOM_ECAM32(2), @@ -68,6 +69,7 @@ static struct mcfg_fixup mcfg_quirks[] = { { "HISI ", table_id, 0, (seg) + 1, MCFG_BUS_ANY, ops }, \ { "HISI ", table_id, 0, (seg) + 2, MCFG_BUS_ANY, ops }, \ { "HISI ", table_id, 0, (seg) + 3, MCFG_BUS_ANY, ops } + HISI_QUAD_DOM("HIP05 ", 0, &hisi_pcie_ops), HISI_QUAD_DOM("HIP06 ", 0, &hisi_pcie_ops), HISI_QUAD_DOM("HIP07 ", 0, &hisi_pcie_ops), @@ -77,6 +79,7 @@ static struct mcfg_fixup mcfg_quirks[] = { #define THUNDER_PEM_RES(addr, node) \ DEFINE_RES_MEM((addr) + ((u64) (node) << 44), 0x39 * SZ_16M) + #define THUNDER_PEM_QUIRK(rev, node) \ { "CAVIUM", "THUNDERX", rev, 4 + (10 * (node)), MCFG_BUS_ANY, \ &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x88001f000000UL, node) }, \ @@ -90,13 +93,15 @@ static struct mcfg_fixup mcfg_quirks[] = { &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x894057000000UL, node) }, \ { "CAVIUM", "THUNDERX", rev, 9 + (10 * (node)), MCFG_BUS_ANY, \ &thunder_pem_ecam_ops, THUNDER_PEM_RES(0x89808f000000UL, node) } - /* SoC pass2.x */ - THUNDER_PEM_QUIRK(1, 0), - THUNDER_PEM_QUIRK(1, 1), #define THUNDER_ECAM_QUIRK(rev, seg) \ { "CAVIUM", "THUNDERX", rev, seg, MCFG_BUS_ANY, \ &pci_thunder_ecam_ops } + + /* SoC pass2.x */ + THUNDER_PEM_QUIRK(1, 0), + THUNDER_PEM_QUIRK(1, 1), + /* SoC pass1.x */ THUNDER_PEM_QUIRK(2, 0), /* off-chip devices */ THUNDER_PEM_QUIRK(2, 1), /* off-chip devices */ @@ -112,9 +117,11 @@ static struct mcfg_fixup mcfg_quirks[] = { #define XGENE_V1_ECAM_MCFG(rev, seg) \ {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \ &xgene_v1_pcie_ecam_ops } + #define XGENE_V2_ECAM_MCFG(rev, seg) \ {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \ &xgene_v2_pcie_ecam_ops } + /* X-Gene SoC with v1 PCIe controller */ XGENE_V1_ECAM_MCFG(1, 0), XGENE_V1_ECAM_MCFG(1, 1), From 569ed828defa070c4b38aca1986d460577e375d5 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Wed, 29 Mar 2017 14:16:13 +0200 Subject: [PATCH 319/465] PCI/ACPI: Add ThunderX pass2.x 2nd node MCFG quirk commit cd183740480f045600aa1fa38fe70809b5498f05 upstream. Currently SoCs pass2.x do not emulate EA headers for ACPI boot method at all. However, for pass2.x some devices (like EDAC) advertise incorrect base addresses in their BARs which results in driver probe failure during resource request. Since all problematic blocks are on 2nd NUMA node under domain 10 add necessary quirk entry to obtain BAR addresses correction using EA header emulation. Fixes: 44f22bd91e88 ("PCI: Add MCFG quirks for Cavium ThunderX pass2.x host controller") Signed-off-by: Tomasz Nowicki Signed-off-by: Bjorn Helgaas Acked-by: Robert Richter Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/pci_mcfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 65affd8f29c1..a4e8432fc2fb 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -101,6 +101,7 @@ static struct mcfg_fixup mcfg_quirks[] = { /* SoC pass2.x */ THUNDER_PEM_QUIRK(1, 0), THUNDER_PEM_QUIRK(1, 1), + THUNDER_ECAM_QUIRK(1, 10), /* SoC pass1.x */ THUNDER_PEM_QUIRK(2, 0), /* off-chip devices */ From 9dc0babcd208d1e68ba692f28c50b5f78f1f0798 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 24 Mar 2017 11:07:22 -0700 Subject: [PATCH 320/465] PCI: hv: Allocate interrupt descriptors with GFP_ATOMIC commit 59c58ceeea9cdc6144d7b0303753e6bd26d87455 upstream. The memory allocation here needs to be non-blocking. Fix the issue. Signed-off-by: K. Y. Srinivasan Signed-off-by: Bjorn Helgaas Reviewed-by: Long Li Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index ada98569b78e..51709320df89 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -876,7 +876,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) hv_int_desc_free(hpdev, int_desc); } - int_desc = kzalloc(sizeof(*int_desc), GFP_KERNEL); + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; From 3537380a05bb9e35c9ababf66126c56af277e219 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 24 Mar 2017 11:07:21 -0700 Subject: [PATCH 321/465] PCI: hv: Specify CPU_AFFINITY_ALL for MSI affinity when >= 32 CPUs commit 433fcf6b7b31f1f233dd50aeb9d066a0f6ed4b9d upstream. When we have 32 or more CPUs in the affinity mask, we should use a special constant to specify that to the host. Fix this issue. Signed-off-by: K. Y. Srinivasan Signed-off-by: Bjorn Helgaas Reviewed-by: Long Li Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-hyperv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 51709320df89..85088a11d9a6 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -72,6 +72,7 @@ enum { PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1 }; +#define CPU_AFFINITY_ALL -1ULL #define PCI_CONFIG_MMIO_LENGTH 0x2000 #define CFG_PAGE_OFFSET 0x1000 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) @@ -897,9 +898,13 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) * processors because Hyper-V only supports 64 in a guest. */ affinity = irq_data_get_affinity_mask(data); - for_each_cpu_and(cpu, affinity, cpu_online_mask) { - int_pkt->int_desc.cpu_mask |= - (1ULL << vmbus_cpu_number_to_vp_number(cpu)); + if (cpumask_weight(affinity) >= 32) { + int_pkt->int_desc.cpu_mask = CPU_AFFINITY_ALL; + } else { + for_each_cpu_and(cpu, affinity, cpu_online_mask) { + int_pkt->int_desc.cpu_mask |= + (1ULL << vmbus_cpu_number_to_vp_number(cpu)); + } } ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, From b87e133e3d71065f37884af40613730810dbb526 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:50 +0100 Subject: [PATCH 322/465] PCI: Fix pci_mmap_fits() for HAVE_PCI_RESOURCE_TO_USER platforms commit 6bccc7f426abd640f08d8c75fb22f99483f201b4 upstream. In the PCI_MMAP_PROCFS case when the address being passed by the user is a 'user visible' resource address based on the bus window, and not the actual contents of the resource, that's what we need to be checking it against. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-sysfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 25d010d449a3..7ac258fd3c5c 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -985,15 +985,19 @@ void pci_remove_legacy_files(struct pci_bus *b) int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma, enum pci_mmap_api mmap_api) { - unsigned long nr, start, size, pci_start; + unsigned long nr, start, size; + resource_size_t pci_start = 0, pci_end; if (pci_resource_len(pdev, resno) == 0) return 0; nr = vma_pages(vma); start = vma->vm_pgoff; size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1; - pci_start = (mmap_api == PCI_MMAP_PROCFS) ? - pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0; + if (mmap_api == PCI_MMAP_PROCFS) { + pci_resource_to_user(pdev, resno, &pdev->resource[resno], + &pci_start, &pci_end); + pci_start >>= PAGE_SHIFT; + } if (start >= pci_start && start < pci_start + size && start + nr <= pci_start + size) return 1; From bee38f0f378d5d1a687afcf311515e3adedd2340 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:51 +0100 Subject: [PATCH 323/465] PCI: Fix another sanity check bug in /proc/pci mmap commit 17caf56731311c9596e7d38a70c88fcb6afa6a1b upstream. Don't match MMIO maps with I/O BARs and vice versa. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index f82710a8694d..62a0c3e788d3 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -231,14 +231,20 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) { struct pci_dev *dev = PDE_DATA(file_inode(file)); struct pci_filp_private *fpriv = file->private_data; - int i, ret, write_combine; + int i, ret, write_combine, res_bit; if (!capable(CAP_SYS_RAWIO)) return -EPERM; + if (fpriv->mmap_state == pci_mmap_io) + res_bit = IORESOURCE_IO; + else + res_bit = IORESOURCE_MEM; + /* Make sure the caller is mapping a real resource for this device */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { - if (pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS)) + if (dev->resource[i].flags & res_bit && + pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS)) break; } From f21143a53ff89444e761508c76e71fa1f3c6890a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:52 +0100 Subject: [PATCH 324/465] PCI: Only allow WC mmap on prefetchable resources commit cef4d02305a06be581bb7f4353446717a1b319ec upstream. The /proc/bus/pci mmap interface allows the user to specify whether they want WC or not. Don't let them do so on non-prefetchable BARs. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/proc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 62a0c3e788d3..dc8912e2d4a1 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -231,7 +231,7 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) { struct pci_dev *dev = PDE_DATA(file_inode(file)); struct pci_filp_private *fpriv = file->private_data; - int i, ret, write_combine, res_bit; + int i, ret, write_combine = 0, res_bit; if (!capable(CAP_SYS_RAWIO)) return -EPERM; @@ -251,10 +251,13 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) if (i >= PCI_ROM_RESOURCE) return -ENODEV; - if (fpriv->mmap_state == pci_mmap_mem) - write_combine = fpriv->write_combine; - else - write_combine = 0; + if (fpriv->mmap_state == pci_mmap_mem && + fpriv->write_combine) { + if (dev->resource[i].flags & IORESOURCE_PREFETCH) + write_combine = 1; + else + return -EINVAL; + } ret = pci_mmap_page_range(dev, vma, fpriv->mmap_state, write_combine); if (ret < 0) From 6003ac5842145bf865d4adc781a3fe25cb0d4640 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 18 Apr 2017 20:44:30 +0200 Subject: [PATCH 325/465] PCI: Freeze PME scan before suspending devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ea00353f36b64375518662a8ad15e39218a1f324 upstream. Laurent Pinchart reported that the Renesas R-Car H2 Lager board (r8a7790) crashes during suspend tests. Geert Uytterhoeven managed to reproduce the issue on an M2-W Koelsch board (r8a7791): It occurs when the PME scan runs, once per second. During PME scan, the PCI host bridge (rcar-pci) registers are accessed while its module clock has already been disabled, leading to the crash. One reproducer is to configure s2ram to use "s2idle" instead of "deep" suspend: # echo 0 > /sys/module/printk/parameters/console_suspend # echo s2idle > /sys/power/mem_sleep # echo mem > /sys/power/state Another reproducer is to write either "platform" or "processors" to /sys/power/pm_test. It does not (or is less likely) to happen during full system suspend ("core" or "none") because system suspend also disables timers, and thus the workqueue handling PME scans no longer runs. Geert believes the issue may still happen in the small window between disabling module clocks and disabling timers: # echo 0 > /sys/module/printk/parameters/console_suspend # echo platform > /sys/power/pm_test # Or "processors" # echo mem > /sys/power/state (Make sure CONFIG_PCI_RCAR_GEN2 and CONFIG_USB_OHCI_HCD_PCI are enabled.) Rafael Wysocki agrees that PME scans should be suspended before the host bridge registers become inaccessible. To that end, queue the task on a workqueue that gets frozen before devices suspend. Rafael notes however that as a result, some wakeup events may be missed if they are delivered via PME from a device without working IRQ (which hence must be polled) and occur after the workqueue has been frozen. If that turns out to be an issue in practice, it may be possible to solve it by calling pci_pme_list_scan() once directly from one of the host bridge's pm_ops callbacks. Stacktrace for posterity: PM: Syncing filesystems ... [ 38.566237] done. PM: Preparing system for sleep (mem) Freezing user space processes ... [ 38.579813] (elapsed 0.001 seconds) done. Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done. PM: Suspending system (mem) PM: suspend of devices complete after 152.456 msecs PM: late suspend of devices complete after 2.809 msecs PM: noirq suspend of devices complete after 29.863 msecs suspend debug: Waiting for 5 second(s). Unhandled fault: asynchronous external abort (0x1211) at 0x00000000 pgd = c0003000 [00000000] *pgd=80000040004003, *pmd=00000000 Internal error: : 1211 [#1] SMP ARM Modules linked in: CPU: 1 PID: 20 Comm: kworker/1:1 Not tainted 4.9.0-rc1-koelsch-00011-g68db9bc814362e7f #3383 Hardware name: Generic R8A7791 (Flattened Device Tree) Workqueue: events pci_pme_list_scan task: eb56e140 task.stack: eb58e000 PC is at pci_generic_config_read+0x64/0x6c LR is at rcar_pci_cfg_base+0x64/0x84 pc : [] lr : [] psr: 600d0093 sp : eb58fe98 ip : c041d750 fp : 00000008 r10: c0e2283c r9 : 00000000 r8 : 600d0013 r7 : 00000008 r6 : eb58fed6 r5 : 00000002 r4 : eb58feb4 r3 : 00000000 r2 : 00000044 r1 : 00000008 r0 : 00000000 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user Control: 30c5387d Table: 6a9f6c80 DAC: 55555555 Process kworker/1:1 (pid: 20, stack limit = 0xeb58e210) Stack: (0xeb58fe98 to 0xeb590000) fe80: 00000002 00000044 fea0: eb6f5800 c041d9b0 eb58feb4 00000008 00000044 00000000 eb78a000 eb78a000 fec0: 00000044 00000000 eb9aff00 c0424bf0 eb78a000 00000000 eb78a000 c0e22830 fee0: ea8a6fc0 c0424c5c eaae79c0 c0424ce0 eb55f380 c0e22838 eb9a9800 c0235fbc ff00: eb55f380 c0e22838 eb55f380 eb9a9800 eb9a9800 eb58e000 eb9a9824 c0e02100 ff20: eb55f398 c02366c4 eb56e140 eb5631c0 00000000 eb55f380 c023641c 00000000 ff40: 00000000 00000000 00000000 c023a928 cd105598 00000000 40506a34 eb55f380 ff60: 00000000 00000000 dead4ead ffffffff ffffffff eb58ff74 eb58ff74 00000000 ff80: 00000000 dead4ead ffffffff ffffffff eb58ff90 eb58ff90 eb58ffac eb5631c0 ffa0: c023a844 00000000 00000000 c0206d68 00000000 00000000 00000000 00000000 ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 3a81336c 10ccd1dd [] (pci_generic_config_read) from [] (pci_bus_read_config_word+0x58/0x80) [] (pci_bus_read_config_word) from [] (pci_check_pme_status+0x34/0x78) [] (pci_check_pme_status) from [] (pci_pme_wakeup+0x28/0x54) [] (pci_pme_wakeup) from [] (pci_pme_list_scan+0x58/0xb4) [] (pci_pme_list_scan) from [] (process_one_work+0x1bc/0x308) [] (process_one_work) from [] (worker_thread+0x2a8/0x3e0) [] (worker_thread) from [] (kthread+0xe4/0xfc) [] (kthread) from [] (ret_from_fork+0x14/0x2c) Code: ea000000 e5903000 f57ff04f e3a00000 (e5843000) ---[ end trace 667d43ba3aa9e589 ]--- Fixes: df17e62e5bff ("PCI: Add support for polling PME state on suspended legacy PCI devices") Reported-and-tested-by: Laurent Pinchart Reported-and-tested-by: Geert Uytterhoeven Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Laurent Pinchart Acked-by: Rafael J. Wysocki Cc: Mika Westerberg Cc: Niklas Söderlund Cc: Simon Horman Cc: Yinghai Lu Cc: Matthew Garrett Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7904d02ffdb9..d35c0162f9f2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1782,8 +1782,8 @@ static void pci_pme_list_scan(struct work_struct *work) } } if (!list_empty(&pci_pme_list)) - schedule_delayed_work(&pci_pme_work, - msecs_to_jiffies(PME_TIMEOUT)); + queue_delayed_work(system_freezable_wq, &pci_pme_work, + msecs_to_jiffies(PME_TIMEOUT)); mutex_unlock(&pci_pme_list_mutex); } @@ -1848,8 +1848,9 @@ void pci_pme_active(struct pci_dev *dev, bool enable) mutex_lock(&pci_pme_list_mutex); list_add(&pme_dev->list, &pci_pme_list); if (list_is_singular(&pci_pme_list)) - schedule_delayed_work(&pci_pme_work, - msecs_to_jiffies(PME_TIMEOUT)); + queue_delayed_work(system_freezable_wq, + &pci_pme_work, + msecs_to_jiffies(PME_TIMEOUT)); mutex_unlock(&pci_pme_list_mutex); } else { mutex_lock(&pci_pme_list_mutex); From 750d21357e36df3cff1b0419a432c084d13e6ff3 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 27 Mar 2017 20:02:07 +0200 Subject: [PATCH 326/465] mtd: nand: orion: fix clk handling commit 675b11d94ce9baa5eb365a51b35d2793f77c8ab8 upstream. The clk handling in orion_nand.c had two problems: - In the probe function, clk_put() was called for an enabled clock, which violates the API (see documentation for clk_put() in include/linux/clk.h) - In the error path of the probe function, clk_put() could be called twice for the same clock. In order to clean this up, use the managed function devm_clk_get() and store the pointer to the clk in the driver data. Fixes: baffab28b13120694fa3ebab08d3e99667a851d2 ('ARM: Orion: fix driver probe error handling with respect to clk') Signed-off-by: Simon Baatz Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/orion_nand.c | 42 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c index 4a91c5d000be..3acdc20485f1 100644 --- a/drivers/mtd/nand/orion_nand.c +++ b/drivers/mtd/nand/orion_nand.c @@ -23,6 +23,11 @@ #include #include +struct orion_nand_info { + struct nand_chip chip; + struct clk *clk; +}; + static void orion_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { struct nand_chip *nc = mtd_to_nand(mtd); @@ -75,20 +80,21 @@ static void orion_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) static int __init orion_nand_probe(struct platform_device *pdev) { + struct orion_nand_info *info; struct mtd_info *mtd; struct nand_chip *nc; struct orion_nand_data *board; struct resource *res; - struct clk *clk; void __iomem *io_base; int ret = 0; u32 val = 0; - nc = devm_kzalloc(&pdev->dev, - sizeof(struct nand_chip), + info = devm_kzalloc(&pdev->dev, + sizeof(struct orion_nand_info), GFP_KERNEL); - if (!nc) + if (!info) return -ENOMEM; + nc = &info->chip; mtd = nand_to_mtd(nc); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -145,15 +151,13 @@ static int __init orion_nand_probe(struct platform_device *pdev) if (board->dev_ready) nc->dev_ready = board->dev_ready; - platform_set_drvdata(pdev, mtd); + platform_set_drvdata(pdev, info); /* Not all platforms can gate the clock, so it is not an error if the clock does not exists. */ - clk = clk_get(&pdev->dev, NULL); - if (!IS_ERR(clk)) { - clk_prepare_enable(clk); - clk_put(clk); - } + info->clk = devm_clk_get(&pdev->dev, NULL); + if (!IS_ERR(info->clk)) + clk_prepare_enable(info->clk); ret = nand_scan(mtd, 1); if (ret) @@ -169,26 +173,22 @@ static int __init orion_nand_probe(struct platform_device *pdev) return 0; no_dev: - if (!IS_ERR(clk)) { - clk_disable_unprepare(clk); - clk_put(clk); - } + if (!IS_ERR(info->clk)) + clk_disable_unprepare(info->clk); return ret; } static int orion_nand_remove(struct platform_device *pdev) { - struct mtd_info *mtd = platform_get_drvdata(pdev); - struct clk *clk; + struct orion_nand_info *info = platform_get_drvdata(pdev); + struct nand_chip *chip = &info->chip; + struct mtd_info *mtd = nand_to_mtd(chip); nand_release(mtd); - clk = clk_get(&pdev->dev, NULL); - if (!IS_ERR(clk)) { - clk_disable_unprepare(clk); - clk_put(clk); - } + if (!IS_ERR(info->clk)) + clk_disable_unprepare(info->clk); return 0; } From 2d3e8503734ac2d7ed780b250d86d046d9459659 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 30 Mar 2017 10:37:50 +0300 Subject: [PATCH 327/465] mtd: nand: omap2: Fix partition creation via cmdline mtdparts commit 2d283ede59869159f4bb84ae689258c5caffce54 upstream. commit c9711ec5250b ("mtd: nand: omap: Clean up device tree support") caused the parent device name to be changed from "omap2-nand.0" to ".nand" (e.g. 30000000.nand on omap3 platforms). This caused mtd->name to be changed as well. This breaks partition creation via mtdparts passed by u-boot as it uses "omap2-nand.0" for the mtd-id. Fix this by explicitly setting the mtd->name to "omap2-nand." if it isn't already set by nand_set_flash_node(). CS number is the NAND controller instance ID. Fixes: c9711ec5250b ("mtd: nand: omap: Clean up device tree support") Reported-by: Leto Enrico Reported-by: Adam Ford Suggested-by: Boris Brezillon Tested-by: Adam Ford Signed-off-by: Roger Quadros Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/omap2.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 2a52101120d4..084934a9f19c 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1856,6 +1856,15 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->ecc.priv = NULL; nand_set_flash_node(nand_chip, dev->of_node); + if (!mtd->name) { + mtd->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "omap2-nand.%d", info->gpmc_cs); + if (!mtd->name) { + dev_err(&pdev->dev, "Failed to set MTD name\n"); + return -ENOMEM; + } + } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(nand_chip->IO_ADDR_R)) From 9c92bffbd8eba7dce1d30734ce02bac8bf7357b0 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Tue, 2 May 2017 12:19:00 +0200 Subject: [PATCH 328/465] mtd: nand: add ooblayout for old hamming layout commit 6a623e07694437ad09f382a13f76cffc32239a7f upstream. The old 1-bit hamming layout requires ECC data to be placed at a fixed offset, and not necessarily at the end of the OOB area. Add this old layout back in order to fix legacy setups. Fixes: 41b207a70d3a ("mtd: nand: implement the default mtd_ooblayout_ops") Signed-off-by: Alexander Couzens Acked-by: Boris Brezillon Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 70 +++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index b0524f8accb6..44c0faf84426 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -139,6 +139,74 @@ const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = { }; EXPORT_SYMBOL_GPL(nand_ooblayout_lp_ops); +/* + * Support the old "large page" layout used for 1-bit Hamming ECC where ECC + * are placed at a fixed offset. + */ +static int nand_ooblayout_ecc_lp_hamming(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct nand_ecc_ctrl *ecc = &chip->ecc; + + if (section) + return -ERANGE; + + switch (mtd->oobsize) { + case 64: + oobregion->offset = 40; + break; + case 128: + oobregion->offset = 80; + break; + default: + return -EINVAL; + } + + oobregion->length = ecc->total; + if (oobregion->offset + oobregion->length > mtd->oobsize) + return -ERANGE; + + return 0; +} + +static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct nand_ecc_ctrl *ecc = &chip->ecc; + int ecc_offset = 0; + + if (section < 0 || section > 1) + return -ERANGE; + + switch (mtd->oobsize) { + case 64: + ecc_offset = 40; + break; + case 128: + ecc_offset = 80; + break; + default: + return -EINVAL; + } + + if (section == 0) { + oobregion->offset = 2; + oobregion->length = ecc_offset - 2; + } else { + oobregion->offset = ecc_offset + ecc->total; + oobregion->length = mtd->oobsize - oobregion->offset; + } + + return 0; +} + +const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = { + .ecc = nand_ooblayout_ecc_lp_hamming, + .free = nand_ooblayout_free_lp_hamming, +}; + static int check_offs_len(struct mtd_info *mtd, loff_t ofs, uint64_t len) { @@ -4653,7 +4721,7 @@ int nand_scan_tail(struct mtd_info *mtd) break; case 64: case 128: - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + mtd_set_ooblayout(mtd, &nand_ooblayout_lp_hamming_ops); break; default: WARN(1, "No oob scheme defined for oobsize %d\n", From acef1b812507e6eae0ddf4137ee44ddf6d7ffc39 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 21 Apr 2017 17:05:08 +0200 Subject: [PATCH 329/465] drm/edid: Add 10 bpc quirk for LGD 764 panel in HP zBook 17 G2 commit e345da82bd6bdfa8492f80b3ce4370acfd868d95 upstream. The builtin eDP panel in the HP zBook 17 G2 supports 10 bpc, as advertised by the Laptops product specs and verified via injecting a fixed edid + photometer measurements, but edid reports unknown depth, so drivers fall back to 6 bpc. Add a quirk to get the full 10 bpc. Signed-off-by: Mario Kleiner Acked-by: Harry Wentland Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1492787108-23959-1-git-send-email-mario.kleiner.de@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ba58f1b11d1e..883b5cfe9504 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -79,6 +79,8 @@ #define EDID_QUIRK_FORCE_12BPC (1 << 9) /* Force 6bpc */ #define EDID_QUIRK_FORCE_6BPC (1 << 10) +/* Force 10bpc */ +#define EDID_QUIRK_FORCE_10BPC (1 << 11) struct detailed_mode_closure { struct drm_connector *connector; @@ -121,6 +123,9 @@ static const struct edid_quirk { { "FCM", 13600, EDID_QUIRK_PREFER_LARGE_75 | EDID_QUIRK_DETAILED_IN_CM }, + /* LGD panel of HP zBook 17 G2, eDP 10 bpc, but reports unknown bpc */ + { "LGD", 764, EDID_QUIRK_FORCE_10BPC }, + /* LG Philips LCD LP154W01-A5 */ { "LPL", 0, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE }, { "LPL", 0x2a00, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE }, @@ -4174,6 +4179,9 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) if (quirks & EDID_QUIRK_FORCE_8BPC) connector->display_info.bpc = 8; + if (quirks & EDID_QUIRK_FORCE_10BPC) + connector->display_info.bpc = 10; + if (quirks & EDID_QUIRK_FORCE_12BPC) connector->display_info.bpc = 12; From e7ec98b137517bd95ff3a556f822236fde5dd6eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Apr 2017 19:20:01 -0400 Subject: [PATCH 330/465] NFSv4: Fix a hang in OPEN related to server reboot commit 56e0d71ef12f026d96213e45a662bde6bbff4676 upstream. If the server fails to return the attributes as part of an OPEN reply, and then reboots, we can end up hanging. The reason is that the client attempts to send a GETATTR in order to pick up the missing OPEN call, but fails to release the slot first, causing reboot recovery to deadlock. Signed-off-by: Trond Myklebust Fixes: 2e80dbe7ac51a ("NFSv4.1: Close callback races for OPEN, LAYOUTGET...") Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 201ca3f2c4ba..8ba6c0d4d499 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2300,8 +2300,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) if (status != 0) return status; } - if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) + if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { + nfs4_sequence_free_slot(&o_res->seq_res); nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); + } return 0; } From 2aa6b5f2ac0f07da8ae8ddb556b3793cd3e7d419 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 14 Apr 2017 14:24:28 -0400 Subject: [PATCH 331/465] NFS: Fix use after free in write error path commit 1f84ccdf37d0db3a70714d02d51b0b6d45887fb8 upstream. Signed-off-by: Fred Isaman Fixes: 0bcbf039f6b2b ("nfs: handle request add failure properly") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index abb2c8a3be42..3d1c2842bb9d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -548,9 +548,9 @@ static void nfs_write_error_remove_page(struct nfs_page *req) { nfs_unlock_request(req); nfs_end_page_writeback(req); - nfs_release_request(req); generic_error_remove_page(page_file_mapping(req->wb_page), req->wb_page); + nfs_release_request(req); } /* From bc38735f21cb9f6facd379931a29d33781fead4e Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 19 Apr 2017 10:11:33 -0400 Subject: [PATCH 332/465] NFS: Use GFP_NOIO for two allocations in writeback commit ae97aa524ef495b6276fd26f5d5449fb22975d7c upstream. Prevent a deadlock that can occur if we wait on allocations that try to write back our pages. Signed-off-by: Benjamin Coddington Fixes: 00bfa30abe869 ("NFS: Create a common pgio_alloc and pgio_release...") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6e629b856a00..cd6ec9bdd1c7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -29,13 +29,14 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; -static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) +static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount, + gfp_t gfp_flags) { p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + p->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags); if (!p->pagevec) p->npages = 0; } @@ -681,6 +682,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, { struct nfs_pgio_mirror *new; int i; + gfp_t gfp_flags = GFP_KERNEL; desc->pg_moreio = 0; desc->pg_inode = inode; @@ -700,8 +702,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, if (pg_ops->pg_get_mirror_count) { /* until we have a request, we don't have an lseg and no * idea how many mirrors there will be */ + if (desc->pg_rw_ops->rw_mode == FMODE_WRITE) + gfp_flags = GFP_NOIO; new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, - sizeof(struct nfs_pgio_mirror), GFP_KERNEL); + sizeof(struct nfs_pgio_mirror), gfp_flags); desc->pg_mirrors_dynamic = new; desc->pg_mirrors = new; @@ -755,9 +759,12 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct list_head *head = &mirror->pg_list; struct nfs_commit_info cinfo; unsigned int pagecount, pageused; + gfp_t gfp_flags = GFP_KERNEL; pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); - if (!nfs_pgarray_set(&hdr->page_array, pagecount)) { + if (desc->pg_rw_ops->rw_mode == FMODE_WRITE) + gfp_flags = GFP_NOIO; + if (!nfs_pgarray_set(&hdr->page_array, pagecount, gfp_flags)) { nfs_pgio_error(hdr); desc->pg_error = -ENOMEM; return desc->pg_error; From 89207ffd222e860464804e90e325bbfdd92d0202 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Apr 2017 14:33:06 -0400 Subject: [PATCH 333/465] pNFS/flexfiles: Check the result of nfs4_pnfs_ds_connect commit 260f32adb88dadfaac29f47f761a088238ca164c upstream. The check in nfs4_ff_layout_prepare_ds() seems to be missing. Signed-off-by: Trond Myklebust Fixes: a33e4b036d461 ("pNFS: return status from nfs4_pnfs_ds_connect") Cc: Weston Andros Adamson Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 457cfeb1d5c1..9e0b24a192cc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -415,7 +415,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].minor_version); /* connect success, check rsize/wsize limit */ - if (ds->ds_clp) { + if (!status) { max_payload = nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), NULL); From af069f63c5614452625536c54bae3168f12b9e41 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 May 2017 13:02:42 -0400 Subject: [PATCH 334/465] NFSv4: Fix an rcu lock leak commit 2e84611b3f4fa50e1f4c12f2966fcc7fb955d944 upstream. The intention in the original patch was to release the lock when we put the inode, however something got screwed up. Reported-by: Jason Yan Fixes: 7b410d9ce460f ("pNFS: Delay getting the layout header in..") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback_proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f073a6d2c6a5..d582d443c21a 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -131,10 +131,11 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, if (!inode) continue; if (!nfs_sb_active(inode->i_sb)) { - rcu_read_lock(); + rcu_read_unlock(); spin_unlock(&clp->cl_lock); iput(inode); spin_lock(&clp->cl_lock); + rcu_read_lock(); goto restart; } return inode; @@ -170,10 +171,11 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp, if (!inode) continue; if (!nfs_sb_active(inode->i_sb)) { - rcu_read_lock(); + rcu_read_unlock(); spin_unlock(&clp->cl_lock); iput(inode); spin_lock(&clp->cl_lock); + rcu_read_lock(); goto restart; } return inode; From 06cc61e8f9edb5d50156622c0940b32e8cca0f3a Mon Sep 17 00:00:00 2001 From: Ari Kauppi Date: Fri, 5 May 2017 16:07:55 -0400 Subject: [PATCH 335/465] nfsd: fix undefined behavior in nfsd4_layout_verify commit b550a32e60a4941994b437a8d662432a486235a5 upstream. UBSAN: Undefined behaviour in fs/nfsd/nfs4proc.c:1262:34 shift exponent 128 is too large for 32-bit type 'int' Depending on compiler+architecture, this may cause the check for layout_type to succeed for overly large values (which seems to be the case with amd64). The large value will be later used in de-referencing nfsd4_layout_ops for function pointers. Reported-by: Jani Tuovila Signed-off-by: Ari Kauppi [colin.king@canonical.com: use LAYOUT_TYPE_MAX instead of 32] Reviewed-by: Dan Carpenter Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d86031b6ad79..c453a1998e00 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1259,7 +1259,8 @@ nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) return NULL; } - if (!(exp->ex_layout_types & (1 << layout_type))) { + if (layout_type >= LAYOUT_TYPE_MAX || + !(exp->ex_layout_types & (1 << layout_type))) { dprintk("%s: layout type %d not supported\n", __func__, layout_type); return NULL; From 9a4723626e1e83b107216b2f0bb4454c52a8de57 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 5 May 2017 16:17:57 -0400 Subject: [PATCH 336/465] nfsd: encoders mustn't use unitialized values in error cases commit f961e3f2acae94b727380c0b74e2d3954d0edf79 upstream. In error cases, lgp->lg_layout_type may be out of bounds; so we shouldn't be using it until after the check of nfserr. This was seen to crash nfsd threads when the server receives a LAYOUTGET request with a large layout type. GETDEVICEINFO has the same problem. Reported-by: Ari Kauppi Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 33017d652b1d..8e9652f37f1f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4119,8 +4119,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getdeviceinfo *gdev) { struct xdr_stream *xdr = &resp->xdr; - const struct nfsd4_layout_ops *ops = - nfsd4_layout_ops[gdev->gd_layout_type]; + const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; __be32 *p; @@ -4137,6 +4136,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, /* If maxcount is 0 then just update notifications */ if (gdev->gd_maxcount != 0) { + ops = nfsd4_layout_ops[gdev->gd_layout_type]; nfserr = ops->encode_getdeviceinfo(xdr, gdev); if (nfserr) { /* @@ -4189,8 +4189,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutget *lgp) { struct xdr_stream *xdr = &resp->xdr; - const struct nfsd4_layout_ops *ops = - nfsd4_layout_ops[lgp->lg_layout_type]; + const struct nfsd4_layout_ops *ops; __be32 *p; dprintk("%s: err %d\n", __func__, nfserr); @@ -4213,6 +4212,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(lgp->lg_seg.iomode); *p++ = cpu_to_be32(lgp->lg_layout_type); + ops = nfsd4_layout_ops[lgp->lg_layout_type]; nfserr = ops->encode_layoutget(xdr, lgp); out: kfree(lgp->lg_content); From 326842c0197ff146be6000e74d45b1c60b3faf5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 May 2017 16:24:59 -0400 Subject: [PATCH 337/465] nfsd: Fix up the "supattr_exclcreat" attributes commit b26b78cb726007533d81fdf90a62e915002ef5c8 upstream. If an NFSv4 client asks us for the supattr_exclcreat, then we must not return attributes that are unsupported by this minor version. Signed-off-by: Trond Myklebust Fixes: 75976de6556f ("NFSD: Return word2 bitmask if setting security..,") Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8e9652f37f1f..26780d53a6f9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2831,9 +2831,14 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } #endif /* CONFIG_NFSD_PNFS */ if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0, - NFSD_SUPPATTR_EXCLCREAT_WORD1, - NFSD_SUPPATTR_EXCLCREAT_WORD2); + u32 supp[3]; + + memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp)); + supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0; + supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1; + supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2; + + status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]); if (status) goto out; } From 846d6e12d0a07725298fcb4e2c403e2ead75d278 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Fri, 12 May 2017 14:42:58 -0700 Subject: [PATCH 338/465] drivers: char: mem: Check for address space wraparound with mmap() commit b299cde245b0b76c977f4291162cf668e087b408 upstream. /dev/mem currently allows mmap() mappings that wrap around the end of the physical address space, which should probably be illegal. It circumvents the existing STRICT_DEVMEM permission check because the loop immediately terminates (as the start address is already higher than the end address). On the x86_64 architecture it will then cause a panic (from the BUG(start >= end) in arch/x86/mm/pat.c:reserve_memtype()). This patch adds an explicit check to make sure offset + size will not wrap around in the physical address type. Signed-off-by: Julius Werner Signed-off-by: Greg Kroah-Hartman --- drivers/char/mem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 7e4a9d1296bb..6e0cbe092220 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -340,6 +340,11 @@ static const struct vm_operations_struct mmap_mem_ops = { static int mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; + phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + + /* It's illegal to wrap around the end of the physical address space. */ + if (offset + (phys_addr_t)size < offset) + return -EINVAL; if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; From b7705859189a225120e7b26cd475ccb770917893 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Nov 2016 10:39:05 +0000 Subject: [PATCH 339/465] drm/i915/gvt: Disable access to stolen memory as a guest commit 04a68a35ce6d7b54749989f943993020f48fed62 upstream. Explicitly disable stolen memory when running as a guest in a virtual machine, since the memory is not mediated between clients and reserved entirely for the host. The actual size should be reported as zero, but like every other quirk we want to tell the user what is happening. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99028 Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161109103905.17860-1-chris@chris-wilson.co.uk Reviewed-by: Zhenyu Wang Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9673bcc3b6ad..ed9a8e934d9a 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -410,6 +410,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) return 0; } + if (intel_vgpu_active(dev_priv)) { + DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + return 0; + } + #ifdef CONFIG_INTEL_IOMMU if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) { DRM_INFO("DMAR active, disabling use of stolen memory\n"); From 8d6d97abb875d320debfabc55b32bca1166b3386 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 20 Mar 2017 17:25:35 -0700 Subject: [PATCH 340/465] IB/hfi1: Protect the global dev_cntr_names and port_cntr_names commit 62eed66e98b4c2286fef2ce5911d8d75b7515f7b upstream. Protect the global dev_cntr_names and port_cntr_names with the global mutex as they are allocated and freed in a function called per device. Otherwise there is a danger of double free and memory leaks. Fixes: Commit b7481944b06e ("IB/hfi1: Show statistics counters under IB stats interface") Reviewed-by: Dennis Dalessandro Reviewed-by: Easwar Hariharan Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/verbs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 16ef7b12b0b8..1c67745a04f6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1524,6 +1524,7 @@ static const char * const driver_cntr_names[] = { "DRIVER_EgrHdrFull" }; +static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ static const char **dev_cntr_names; static const char **port_cntr_names; static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); @@ -1578,6 +1579,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, { int i, err; + mutex_lock(&cntr_names_lock); if (!cntr_names_initialized) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -1586,8 +1588,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, num_driver_cntrs, &num_dev_cntrs, &dev_cntr_names); - if (err) + if (err) { + mutex_unlock(&cntr_names_lock); return NULL; + } for (i = 0; i < num_driver_cntrs; i++) dev_cntr_names[num_dev_cntrs + i] = @@ -1601,10 +1605,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, if (err) { kfree(dev_cntr_names); dev_cntr_names = NULL; + mutex_unlock(&cntr_names_lock); return NULL; } cntr_names_initialized = 1; } + mutex_unlock(&cntr_names_lock); if (!port_num) return rdma_alloc_hw_stats_struct( @@ -1823,9 +1829,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + mutex_lock(&cntr_names_lock); kfree(dev_cntr_names); kfree(port_cntr_names); + dev_cntr_names = NULL; + port_cntr_names = NULL; cntr_names_initialized = 0; + mutex_unlock(&cntr_names_lock); } void hfi1_cnp_rcv(struct hfi1_packet *packet) From b43ae25b7026ac4c7a0d96cf073e700fbc43c3ce Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 25 May 2017 15:46:45 +0200 Subject: [PATCH 341/465] Linux 4.11.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d7b64830a7b7..7bab1279d0b5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 11 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Fearless Coyote From db8ebc6da8cfd1057dc94e69fbd7a8c5ff34cef6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 May 2017 06:29:19 -0700 Subject: [PATCH 342/465] dccp/tcp: do not inherit mc_list from parent [ Upstream commit 657831ffc38e30092a2d5f03d385d710eb88b09a ] syzkaller found a way to trigger double frees from ip_mc_drop_socket() It turns out that leave a copy of parent mc_list at accept() time, which is very bad. Very similar to commit 8b485ce69876 ("tcp: do not inherit fastopen_req from parent") Initial report from Pray3r, completed by Andrey one. Thanks a lot to them ! Signed-off-by: Eric Dumazet Reported-by: Pray3r Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5e313c1ac94f..1054d330bf9d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -794,6 +794,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, /* listeners have SOCK_RCU_FREE, not the children */ sock_reset_flag(newsk, SOCK_RCU_FREE); + inet_sk(newsk)->mc_list = NULL; + newsk->sk_mark = inet_rsk(req)->ir_mark; atomic64_set(&newsk->sk_cookie, atomic64_read(&inet_rsk(req)->ir_cookie)); From 03a275f5aa819b8824a94572ac1c18cf69cae3ca Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 9 May 2017 18:27:33 +0800 Subject: [PATCH 343/465] driver: vrf: Fix one possible use-after-free issue [ Upstream commit 1a4a5bf52a4adb477adb075e5afce925824ad132 ] The current codes only deal with the case that the skb is dropped, it may meet one use-after-free issue when NF_HOOK returns 0 that means the skb is stolen by one netfilter rule or hook. When one netfilter rule or hook stoles the skb and return NF_STOLEN, it means the skb is taken by the rule, and other modules should not touch this skb ever. Maybe the skb is queued or freed directly by the rule. Now uses the nf_hook instead of NF_HOOK to get the result of netfilter, and check the return value of nf_hook. Only when its value equals 1, it means the skb could go ahead. Or reset the skb as NULL. BTW, because vrf_rcv_finish is empty function, so needn't invoke it even though nf_hook returns 1. But we need to modify vrf_rcv_finish to deal with the NF_STOLEN case. There are two cases when skb is stolen. 1. The skb is stolen and freed directly. There is nothing we need to do, and vrf_rcv_finish isn't invoked. 2. The skb is queued and reinjected again. The vrf_rcv_finish would be invoked as okfn, so need to free the skb in it. Signed-off-by: Gao Feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 7d909c8183e9..df74efcf237b 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -851,6 +851,7 @@ static u32 vrf_fib_table(const struct net_device *dev) static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + kfree_skb(skb); return 0; } @@ -860,7 +861,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, { struct net *net = dev_net(dev); - if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) + if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1) skb = NULL; /* kfree_skb(skb) handled by nf code */ return skb; From 8e929937f8813fb209a2d733ee1367db80b6f622 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 9 May 2017 16:59:54 -0700 Subject: [PATCH 344/465] ipv6/dccp: do not inherit ipv6_mc_list from parent [ Upstream commit 83eaddab4378db256d00d295bda6ca997cd13a52 ] Like commit 657831ffc38e ("dccp/tcp: do not inherit mc_list from parent") we should clear ipv6_mc_list etc. for IPv6 sockets too. Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv6.c | 6 ++++++ net/ipv6/tcp_ipv6.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d9b6a4e403e7..b6bbb71e713e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; @@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4c4afdca41ff..ff5f87641651 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1070,6 +1070,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; @@ -1139,6 +1140,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; From 41b81ff056a486cfc4ff9abc10085a17917dd102 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 10 May 2017 19:07:51 +0200 Subject: [PATCH 345/465] s390/qeth: handle sysfs error during initialization [ Upstream commit 9111e7880ccf419548c7b0887df020b08eadb075 ] When setting up the device from within the layer discipline's probe routine, creating the layer-specific sysfs attributes can fail. Report this error back to the caller, and handle it by releasing the layer discipline. Signed-off-by: Ursula Braun [jwi: updated commit msg, moved an OSN change to a subsequent patch] Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_main.c | 4 +++- drivers/s390/net/qeth_core_sys.c | 2 ++ drivers/s390/net/qeth_l2_main.c | 5 ++++- drivers/s390/net/qeth_l3_main.c | 5 ++++- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9a5f99ccb122..5c072b77e980 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5661,8 +5661,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) if (rc) goto err; rc = card->discipline->setup(card->gdev); - if (rc) + if (rc) { + qeth_core_free_discipline(card); goto err; + } } rc = card->discipline->set_online(gdev); err: diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 75b29fd2fcf4..412ff61891ac 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -426,6 +426,8 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, goto out; rc = card->discipline->setup(card->gdev); + if (rc) + qeth_core_free_discipline(card); out: mutex_unlock(&card->discipline_mutex); return rc ? rc : count; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index af4e6a639fec..95d3ec8c3445 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1009,8 +1009,11 @@ static int qeth_l2_stop(struct net_device *dev) static int qeth_l2_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - qeth_l2_create_device_attributes(&gdev->dev); + rc = qeth_l2_create_device_attributes(&gdev->dev); + if (rc) + return rc; INIT_LIST_HEAD(&card->vid_list); hash_init(card->mac_htable); card->options.layer2 = 1; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 653f0fb76573..5fe37fa32b38 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3153,8 +3153,11 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) static int qeth_l3_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - qeth_l3_create_device_attributes(&gdev->dev); + rc = qeth_l3_create_device_attributes(&gdev->dev); + if (rc) + return rc; card->options.layer2 = 0; card->info.hwtrap = 0; return 0; From 9b1042aa59c46a0cc8c5aceb5c8c4441e4e3b0f5 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 10 May 2017 19:07:52 +0200 Subject: [PATCH 346/465] s390/qeth: unbreak OSM and OSN support [ Upstream commit 2d2ebb3ed0c6acfb014f98e427298673a5d07b82 ] commit b4d72c08b358 ("qeth: bridgeport support - basic control") broke the support for OSM and OSN devices as follows: As OSM and OSN are L2 only, qeth_core_probe_device() does an early setup by loading the l2 discipline and calling qeth_l2_probe_device(). In this context, adding the l2-specific bridgeport sysfs attributes via qeth_l2_create_device_attributes() hits a BUG_ON in fs/sysfs/group.c, since the basic sysfs infrastructure for the device hasn't been established yet. Note that OSN actually has its own unique sysfs attributes (qeth_osn_devtype), so the additional attributes shouldn't be created at all. For OSM, add a new qeth_l2_devtype that contains all the common and l2-specific sysfs attributes. When qeth_core_probe_device() does early setup for OSM or OSN, assign the corresponding devtype so that the ccwgroup probe code creates the full set of sysfs attributes. This allows us to skip qeth_l2_create_device_attributes() in case of an early setup. Any device that can't do early setup will initially have only the generic sysfs attributes, and when it's probed later qeth_l2_probe_device() adds the l2-specific attributes. If an early-setup device is removed (by calling ccwgroup_ungroup()), device_unregister() will - using the devtype - delete the l2-specific attributes before qeth_l2_remove_device() is called. So make sure to not remove them twice. What complicates the issue is that qeth_l2_probe_device() and qeth_l2_remove_device() is also called on a device when its layer2 attribute changes (ie. its layer mode is switched). For early-setup devices this wouldn't work properly - we wouldn't remove the l2-specific attributes when switching to L3. But switching the layer mode doesn't actually make any sense; we already decided that the device can only operate in L2! So just refuse to switch the layer mode on such devices. Note that OSN doesn't have a layer2 attribute, so we only need to special-case OSM. Based on an initial patch by Ursula Braun. Fixes: b4d72c08b358 ("qeth: bridgeport support - basic control") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core.h | 4 ++++ drivers/s390/net/qeth_core_main.c | 17 +++++++++-------- drivers/s390/net/qeth_core_sys.c | 22 ++++++++++++++-------- drivers/s390/net/qeth_l2.h | 2 ++ drivers/s390/net/qeth_l2_main.c | 17 +++++++++++++---- drivers/s390/net/qeth_l2_sys.c | 8 ++++++++ drivers/s390/net/qeth_l3_main.c | 1 + 7 files changed, 51 insertions(+), 20 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index d9561e39c3b2..a26b7e8c0d10 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -714,6 +714,7 @@ enum qeth_discipline_id { }; struct qeth_discipline { + const struct device_type *devtype; void (*start_poll)(struct ccw_device *, int, unsigned long); qdio_handler_t *input_handler; qdio_handler_t *output_handler; @@ -889,6 +890,9 @@ extern struct qeth_discipline qeth_l2_discipline; extern struct qeth_discipline qeth_l3_discipline; extern const struct attribute_group *qeth_generic_attr_groups[]; extern const struct attribute_group *qeth_osn_attr_groups[]; +extern const struct attribute_group qeth_device_attr_group; +extern const struct attribute_group qeth_device_blkt_group; +extern const struct device_type qeth_generic_devtype; extern struct workqueue_struct *qeth_wq; int qeth_card_hw_is_reachable(struct qeth_card *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 5c072b77e980..3a8ff756a025 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5460,10 +5460,12 @@ void qeth_core_free_discipline(struct qeth_card *card) card->discipline = NULL; } -static const struct device_type qeth_generic_devtype = { +const struct device_type qeth_generic_devtype = { .name = "qeth_generic", .groups = qeth_generic_attr_groups, }; +EXPORT_SYMBOL_GPL(qeth_generic_devtype); + static const struct device_type qeth_osn_devtype = { .name = "qeth_osn", .groups = qeth_osn_attr_groups, @@ -5589,23 +5591,22 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) goto err_card; } - if (card->info.type == QETH_CARD_TYPE_OSN) - gdev->dev.type = &qeth_osn_devtype; - else - gdev->dev.type = &qeth_generic_devtype; - switch (card->info.type) { case QETH_CARD_TYPE_OSN: case QETH_CARD_TYPE_OSM: rc = qeth_core_load_discipline(card, QETH_DISCIPLINE_LAYER2); if (rc) goto err_card; + + gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN) + ? card->discipline->devtype + : &qeth_osn_devtype; rc = card->discipline->setup(card->gdev); if (rc) goto err_disc; - case QETH_CARD_TYPE_OSD: - case QETH_CARD_TYPE_OSX: + break; default: + gdev->dev.type = &qeth_generic_devtype; break; } diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 412ff61891ac..db6a285d41e0 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -413,12 +413,16 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, if (card->options.layer2 == newdis) goto out; - else { - card->info.mac_bits = 0; - if (card->discipline) { - card->discipline->remove(card->gdev); - qeth_core_free_discipline(card); - } + if (card->info.type == QETH_CARD_TYPE_OSM) { + /* fixed layer, can't switch */ + rc = -EOPNOTSUPP; + goto out; + } + + card->info.mac_bits = 0; + if (card->discipline) { + card->discipline->remove(card->gdev); + qeth_core_free_discipline(card); } rc = qeth_core_load_discipline(card, newdis); @@ -705,10 +709,11 @@ static struct attribute *qeth_blkt_device_attrs[] = { &dev_attr_inter_jumbo.attr, NULL, }; -static struct attribute_group qeth_device_blkt_group = { +const struct attribute_group qeth_device_blkt_group = { .name = "blkt", .attrs = qeth_blkt_device_attrs, }; +EXPORT_SYMBOL_GPL(qeth_device_blkt_group); static struct attribute *qeth_device_attrs[] = { &dev_attr_state.attr, @@ -728,9 +733,10 @@ static struct attribute *qeth_device_attrs[] = { &dev_attr_switch_attrs.attr, NULL, }; -static struct attribute_group qeth_device_attr_group = { +const struct attribute_group qeth_device_attr_group = { .attrs = qeth_device_attrs, }; +EXPORT_SYMBOL_GPL(qeth_device_attr_group); const struct attribute_group *qeth_generic_attr_groups[] = { &qeth_device_attr_group, diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h index 29d9fb3890ad..0d59f9a45ea9 100644 --- a/drivers/s390/net/qeth_l2.h +++ b/drivers/s390/net/qeth_l2.h @@ -8,6 +8,8 @@ #include "qeth_core.h" +extern const struct attribute_group *qeth_l2_attr_groups[]; + int qeth_l2_create_device_attributes(struct device *); void qeth_l2_remove_device_attributes(struct device *); void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 95d3ec8c3445..918e74f610ed 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1006,14 +1006,21 @@ static int qeth_l2_stop(struct net_device *dev) return 0; } +static const struct device_type qeth_l2_devtype = { + .name = "qeth_layer2", + .groups = qeth_l2_attr_groups, +}; + static int qeth_l2_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; - rc = qeth_l2_create_device_attributes(&gdev->dev); - if (rc) - return rc; + if (gdev->dev.type == &qeth_generic_devtype) { + rc = qeth_l2_create_device_attributes(&gdev->dev); + if (rc) + return rc; + } INIT_LIST_HEAD(&card->vid_list); hash_init(card->mac_htable); card->options.layer2 = 1; @@ -1025,7 +1032,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) { struct qeth_card *card = dev_get_drvdata(&cgdev->dev); - qeth_l2_remove_device_attributes(&cgdev->dev); + if (cgdev->dev.type == &qeth_generic_devtype) + qeth_l2_remove_device_attributes(&cgdev->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); @@ -1409,6 +1417,7 @@ static int qeth_l2_control_event(struct qeth_card *card, } struct qeth_discipline qeth_l2_discipline = { + .devtype = &qeth_l2_devtype, .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 692db49e3d2a..a48ed9e7e168 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -272,3 +272,11 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card) } else qeth_bridgeport_an_set(card, 0); } + +const struct attribute_group *qeth_l2_attr_groups[] = { + &qeth_device_attr_group, + &qeth_device_blkt_group, + /* l2 specific, see l2_{create,remove}_device_attributes(): */ + &qeth_l2_bridgeport_attr_group, + NULL, +}; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 5fe37fa32b38..aeeb67242b74 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3434,6 +3434,7 @@ static int qeth_l3_control_event(struct qeth_card *card, } struct qeth_discipline qeth_l3_discipline = { + .devtype = &qeth_generic_devtype, .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, From b8a9a79c8913046ff2477c948ee8e69648afe81d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 10 May 2017 19:07:53 +0200 Subject: [PATCH 347/465] s390/qeth: avoid null pointer dereference on OSN [ Upstream commit 25e2c341e7818a394da9abc403716278ee646014 ] Access card->dev only after checking whether's its valid. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l2_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 918e74f610ed..a2bb77d7e5bb 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1091,7 +1091,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) case QETH_CARD_TYPE_OSN: card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN, ether_setup); - card->dev->flags |= IFF_NOARP; break; default: card->dev = alloc_etherdev(0); @@ -1106,9 +1105,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->min_mtu = 64; card->dev->max_mtu = ETH_MAX_MTU; card->dev->netdev_ops = &qeth_l2_netdev_ops; - card->dev->ethtool_ops = - (card->info.type != QETH_CARD_TYPE_OSN) ? - &qeth_l2_ethtool_ops : &qeth_l2_osn_ops; + if (card->info.type == QETH_CARD_TYPE_OSN) { + card->dev->ethtool_ops = &qeth_l2_osn_ops; + card->dev->flags |= IFF_NOARP; + } else { + card->dev->ethtool_ops = &qeth_l2_ethtool_ops; + } card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_SG; From 617461cac6523a03fcac1d64962753d844dd23bb Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 10 May 2017 19:07:54 +0200 Subject: [PATCH 348/465] s390/qeth: add missing hash table initializations [ Upstream commit ebccc7397e4a49ff64c8f44a54895de9d32fe742 ] commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") added new hash tables, but missed to initialize them. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Ursula Braun Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l3_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index aeeb67242b74..d2fb50fd03dc 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3158,6 +3158,8 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) rc = qeth_l3_create_device_attributes(&gdev->dev); if (rc) return rc; + hash_init(card->ip_htable); + hash_init(card->ip_mc_htable); card->options.layer2 = 0; card->info.hwtrap = 0; return 0; From 3bfb04d10247c111acee0ed785600a837afadf9d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 11 May 2017 01:53:15 +0200 Subject: [PATCH 349/465] bpf, arm64: fix faulty emission of map access in tail calls [ Upstream commit d8b54110ee944de522ccd3531191f39986ec20f9 ] Shubham was recently asking on netdev why in arm64 JIT we don't multiply the index for accessing the tail call map by 8. That led me into testing out arm64 JIT wrt tail calls and it turned out I got a NULL pointer dereference on the tail call. The buggy access is at: prog = array->ptrs[index]; if (prog == NULL) goto out; [...] 00000060: d2800e0a mov x10, #0x70 // #112 00000064: f86a682a ldr x10, [x1,x10] 00000068: f862694b ldr x11, [x10,x2] 0000006c: b40000ab cbz x11, 0x00000080 [...] The code triggering the crash is f862694b. x1 at the time contains the address of the bpf array, x10 offsetof(struct bpf_array, ptrs). Meaning, above we load the pointer to the program at map slot 0 into x10. x10 can then be NULL if the slot is not occupied, which we later on try to access with a user given offset in x2 that is the map index. Fix this by emitting the following instead: [...] 00000060: d2800e0a mov x10, #0x70 // #112 00000064: 8b0a002a add x10, x1, x10 00000068: d37df04b lsl x11, x2, #3 0000006c: f86b694b ldr x11, [x10,x11] 00000070: b40000ab cbz x11, 0x00000084 [...] This basically adds the offset to ptrs to the base address of the bpf array we got and we later on access the map with an index * 8 offset relative to that. The tail call map itself is basically one large area with meta data at the head followed by the array of prog pointers. This makes tail calls working again, tested on Cavium ThunderX ARMv8. Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") Reported-by: Shubham Bansal Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit_comp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ce8ab0409deb..4b0e0eda698b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -252,8 +252,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ off = offsetof(struct bpf_array, ptrs); emit_a64_mov_i64(tmp, off, ctx); - emit(A64_LDR64(tmp, r2, tmp), ctx); - emit(A64_LDR64(prg, tmp, r3), ctx); + emit(A64_ADD(1, tmp, r2, tmp), ctx); + emit(A64_LSL(1, prg, r3, 3), ctx); + emit(A64_LDR64(prg, tmp, prg), ctx); emit(A64_CBZ(1, prg, jmp_offset), ctx); /* goto *(prog->bpf_func + prologue_size); */ From e13cb6c25b555f75cef9eb44d348b361da1291d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 May 2017 15:24:41 -0700 Subject: [PATCH 350/465] netem: fix skb_orphan_partial() [ Upstream commit f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 ] I should have known that lowering skb->truesize was dangerous :/ In case packets are not leaving the host via a standard Ethernet device, but looped back to local sockets, bad things can happen, as reported by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 ) So instead of tweaking skb->truesize, lets change skb->destructor and keep a reference on the owner socket via its sk_refcnt. Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper") Signed-off-by: Eric Dumazet Reported-by: Michael Madsen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 2c4f574168fb..3a7260f7c2b4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1699,28 +1699,24 @@ EXPORT_SYMBOL(skb_set_owner_w); * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. * But we also want to keep skb->sk set because some packet schedulers - * rely on it (sch_fq for example). So we set skb->truesize to a small - * amount (1) and decrease sk_wmem_alloc accordingly. + * rely on it (sch_fq for example). */ void skb_orphan_partial(struct sk_buff *skb) { - /* If this skb is a TCP pure ACK or already went here, - * we have nothing to do. 2 is already a very small truesize. - */ - if (skb->truesize <= 2) + if (skb_is_tcp_pure_ack(skb)) return; - /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, - * so we do not completely orphan skb, but transfert all - * accounted bytes but one, to avoid unexpected reorders. - */ if (skb->destructor == sock_wfree #ifdef CONFIG_INET || skb->destructor == tcp_wfree #endif ) { - atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); - skb->truesize = 1; + struct sock *sk = skb->sk; + + if (atomic_inc_not_zero(&sk->sk_refcnt)) { + atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + skb->destructor = sock_efree; + } } else { skb_orphan(skb); } From 20d699e0ca98c9708aab910937c3f7235527036d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 May 2017 13:27:53 -0700 Subject: [PATCH 351/465] net: fix compile error in skb_orphan_partial() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9142e9007f2d7ab58a587a1e1d921b0064a339aa ] If CONFIG_INET is not set, net/core/sock.c can not compile : net/core/sock.c: In function ‘skb_orphan_partial’: net/core/sock.c:1810:2: error: implicit declaration of function ‘skb_is_tcp_pure_ack’ [-Werror=implicit-function-declaration] if (skb_is_tcp_pure_ack(skb)) ^ Fix this by always including Fixes: f6ba8d33cfbb ("netem: fix skb_orphan_partial()") Signed-off-by: Eric Dumazet Reported-by: Paul Gortmaker Reported-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 3a7260f7c2b4..59edc0e8c71a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -138,10 +138,7 @@ #include -#ifdef CONFIG_INET #include -#endif - #include static DEFINE_MUTEX(proto_list_mutex); From c91cd74b32ecbfa1a796e9f789e229024f9902f1 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 10 May 2017 17:01:27 -0700 Subject: [PATCH 352/465] tcp: avoid fragmenting peculiar skbs in SACK [ Upstream commit b451e5d24ba6687c6f0e7319c727a709a1846c06 ] This patch fixes a bug in splitting an SKB during SACK processing. Specifically if an skb contains multiple packets and is only partially sacked in the higher sequences, tcp_match_sack_to_skb() splits the skb and marks the second fragment as SACKed. The current code further attempts rounding up the first fragment to MSS boundaries. But it misses a boundary condition when the rounded-up fragment size (pkt_len) is exactly skb size. Spliting such an skb is pointless and causses a kernel warning and aborts the SACK processing. This patch universally checks such over-split before calling tcp_fragment to prevent these unnecessary warnings. Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3c6c8787b42e..b72b0611443d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1174,13 +1174,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, */ if (pkt_len > mss) { unsigned int new_len = (pkt_len / mss) * mss; - if (!in_sack && new_len < pkt_len) { + if (!in_sack && new_len < pkt_len) new_len += mss; - if (new_len >= skb->len) - return 0; - } pkt_len = new_len; } + + if (pkt_len >= skb->len && !in_sack) + return 0; + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; From d9cb26d6a39f4d75dddf699033d5963b6633b98b Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 11 May 2017 20:28:15 +0200 Subject: [PATCH 353/465] tipc: make macro tipc_wait_for_cond() smp safe [ Upstream commit 844cf763fba654436d3a4279b6a672c196cf1901 ] The macro tipc_wait_for_cond() is embedding the macro sk_wait_event() to fulfil its task. The latter, in turn, is evaluating the stated condition outside the socket lock context. This is problematic if the condition is accessing non-trivial data structures which may be altered by incoming interrupts, as is the case with the cong_links() linked list, used by socket to keep track of the current set of congested links. We sometimes see crashes when this list is accessed by a condition function at the same time as a SOCK_WAKEUP interrupt is removing an element from the list. We fix this by expanding selected parts of sk_wait_event() into the outer macro, while ensuring that all evaluations of a given condition are performed under socket lock protection. Fixes: commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Reviewed-by: Parthasarathy Bhuvaragan Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index bdce99f9407a..599c69bca8b9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -361,25 +361,25 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) return 0; } -#define tipc_wait_for_cond(sock_, timeout_, condition_) \ -({ \ - int rc_ = 0; \ - int done_ = 0; \ - \ - while (!(condition_) && !done_) { \ - struct sock *sk_ = sock->sk; \ - DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ - \ - rc_ = tipc_sk_sock_err(sock_, timeout_); \ - if (rc_) \ - break; \ - prepare_to_wait(sk_sleep(sk_), &wait_, \ - TASK_INTERRUPTIBLE); \ - done_ = sk_wait_event(sk_, timeout_, \ - (condition_), &wait_); \ - remove_wait_queue(sk_sleep(sk_), &wait_); \ - } \ - rc_; \ +#define tipc_wait_for_cond(sock_, timeo_, condition_) \ +({ \ + struct sock *sk_; \ + int rc_; \ + \ + while ((rc_ = !(condition_))) { \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + sk_ = (sock_)->sk; \ + rc_ = tipc_sk_sock_err((sock_), timeo_); \ + if (rc_) \ + break; \ + prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + release_sock(sk_); \ + *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ + sched_annotate_sleep(); \ + lock_sock(sk_); \ + remove_wait_queue(sk_sleep(sk_), &wait_); \ + } \ + rc_; \ }) /** From dc9bf5513e0ffa01e5f0c7b8a183d24610e6cdaa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 12 May 2017 14:39:52 +0800 Subject: [PATCH 354/465] sctp: fix src address selection if using secondary addresses for ipv6 [ Upstream commit dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 ] Commit 0ca50d12fe46 ("sctp: fix src address selection if using secondary addresses") has fixed a src address selection issue when using secondary addresses for ipv4. Now sctp ipv6 also has the similar issue. When using a secondary address, sctp_v6_get_dst tries to choose the saddr which has the most same bits with the daddr by sctp_v6_addr_match_len. It may make some cases not work as expected. hostA: [1] fd21:356b:459a:cf10::11 (eth1) [2] fd21:356b:459a:cf20::11 (eth2) hostB: [a] fd21:356b:459a:cf30::2 (eth1) [b] fd21:356b:459a:cf40::2 (eth2) route from hostA to hostB: fd21:356b:459a:cf30::/64 dev eth1 metric 1024 mtu 1500 The expected path should be: fd21:356b:459a:cf10::11 <-> fd21:356b:459a:cf30::2 But addr[2] matches addr[a] more bits than addr[1] does, according to sctp_v6_addr_match_len. It causes the path to be: fd21:356b:459a:cf20::11 <-> fd21:356b:459a:cf30::2 This patch is to fix it with the same way as Marcelo's fix for sctp ipv4. As no ip_dev_find for ipv6, this patch is to use ipv6_chk_addr to check if the saddr is in a dev instead. Note that for backwards compatibility, it will still do the addr_match_len check here when no optimal is found. Reported-by: Patrick Talbert Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 961ee59f696a..142b70e959af 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; - union sctp_addr *baddr = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; __u8 matchlen = 0; - __u8 bmatchlen; sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); @@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid) + struct dst_entry *bdst; + __u8 bmatchlen; + + if (!laddr->valid || + laddr->state != SCTP_ADDR_SRC || + laddr->a.sa.sa_family != AF_INET6 || + scope > sctp_scope(&laddr->a)) continue; - if ((laddr->state == SCTP_ADDR_SRC) && - (laddr->a.sa.sa_family == AF_INET6) && - (scope <= sctp_scope(&laddr->a))) { - bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); - if (!baddr || (matchlen < bmatchlen)) { - baddr = &laddr->a; - matchlen = bmatchlen; - } - } - } - if (baddr) { - fl6->saddr = baddr->v6.sin6_addr; - fl6->fl6_sport = baddr->v6.sin6_port; + + fl6->saddr = laddr->a.v6.sin6_addr; + fl6->fl6_sport = laddr->a.v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p); + bdst = ip6_dst_lookup_flow(sk, fl6, final_p); + + if (!IS_ERR(bdst) && + ipv6_chk_addr(dev_net(bdst->dev), + &laddr->a.v6.sin6_addr, bdst->dev, 1)) { + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + break; + } + + bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); + if (matchlen > bmatchlen) + continue; + + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + matchlen = bmatchlen; } rcu_read_unlock(); From 703a20827411c3906b644713bc4462d4b3fb6a5f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 May 2017 07:16:40 -0700 Subject: [PATCH 355/465] sctp: do not inherit ipv6_{mc|ac|fl}_list from parent [ Upstream commit fdcee2cbb8438702ea1b328fb6e0ac5e9a40c7f8 ] SCTP needs fixes similar to 83eaddab4378 ("ipv6/dccp: do not inherit ipv6_mc_list from parent"), otherwise bad things can happen. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 142b70e959af..f5b45b8b8b16 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -677,6 +677,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; rcu_read_lock(); opt = rcu_dereference(np->opt); From 03c10a87f16fb27f65cb3eb54dc4a84753177720 Mon Sep 17 00:00:00 2001 From: Douglas Caetano dos Santos Date: Fri, 12 May 2017 15:19:15 -0300 Subject: [PATCH 356/465] net/packet: fix missing net_device reference release [ Upstream commit d19b183cdc1fa3d70d6abe2a4c369e748cd7ebb8 ] When using a TX ring buffer, if an error occurs processing a control message (e.g. invalid message), the net_device reference is not released. Fixes c14ac9451c348 ("sock: enable timestamping using control messages") Signed-off-by: Douglas Caetano dos Santos Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea81ccf3c7d6..b2d8e8c23e46 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2614,13 +2614,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } - sockc.tsflags = po->sk.sk_tsflags; - if (msg->msg_controllen) { - err = sock_cmsg_send(&po->sk, msg, &sockc); - if (unlikely(err)) - goto out; - } - err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -2628,6 +2621,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + sockc.tsflags = po->sk.sk_tsflags; + if (msg->msg_controllen) { + err = sock_cmsg_send(&po->sk, msg, &sockc); + if (unlikely(err)) + goto out_put; + } + if (po->sk.sk_socket->type == SOCK_RAW) reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size From c8a52aa79a4b44a50cf4f213f6dfe3e3cfda5368 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 3 Apr 2017 15:11:22 +0300 Subject: [PATCH 357/465] net/mlx5e: Use the correct pause values for ethtool advertising [ Upstream commit b383b544f2666d67446b951a9a97af239dafed5d ] Query the operational pause from firmware (PFCC register) instead of always passing zeros. Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API") Signed-off-by: Gal Pressman Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index a004a5a1a4c2..69bfd8fecb41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -828,6 +828,8 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + u32 rx_pause = 0; + u32 tx_pause = 0; u32 eth_proto_cap; u32 eth_proto_admin; u32 eth_proto_lp; @@ -850,11 +852,13 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); an_status = MLX5_GET(ptys_reg, out, an_status); + mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); get_supported(eth_proto_cap, link_ksettings); - get_advertising(eth_proto_admin, 0, 0, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); get_speed_duplex(netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; From cc1d2a620dd0aeb55e641f5a406bab631aa6842c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 19 Apr 2017 14:35:15 +0300 Subject: [PATCH 358/465] net/mlx5e: Fix ethtool pause support and advertise reporting [ Upstream commit e3c19503712d6360239b19c14cded56dd63c40d7 ] Pause bit should set when RX pause is on, not TX pause. Also, setting Asym_Pause is incorrect, and should be turned off. Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API") Signed-off-by: Gal Pressman Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 69bfd8fecb41..949fbadd7817 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -773,7 +773,6 @@ static void get_supported(u32 eth_proto_cap, ptys2ethtool_supported_port(link_ksettings, eth_proto_cap); ptys2ethtool_supported_link(supported, eth_proto_cap); ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); - ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause); } static void get_advertising(u32 eth_proto_cap, u8 tx_pause, @@ -783,7 +782,7 @@ static void get_advertising(u32 eth_proto_cap, u8 tx_pause, unsigned long *advertising = link_ksettings->link_modes.advertising; ptys2ethtool_adver_link(advertising, eth_proto_cap); - if (tx_pause) + if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); if (tx_pause ^ rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); From d6cb41cf3023ac2026f149ce77864ea49cbb470b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 15 May 2017 17:05:47 -0400 Subject: [PATCH 359/465] tcp: eliminate negative reordering in tcp_clean_rtx_queue [ Upstream commit bafbb9c73241760023d8981191ddd30bb1c6dbac ] tcp_ack() can call tcp_fragment() which may dededuct the value tp->fackets_out when MSS changes. When prior_fackets is larger than tp->fackets_out, tcp_clean_rtx_queue() can invoke tcp_update_reordering() with negative values. This results in absurd tp->reodering values higher than sysctl_tcp_max_reordering. Note that tcp_update_reordering indeeds sets tp->reordering to min(sysctl_tcp_max_reordering, metric), but because the comparison is signed, a negative metric always wins. Fixes: c7caf8d3ed7a ("[TCP]: Fix reord detection due to snd_una covered holes") Reported-by: Rebecca Isaacs Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b72b0611443d..896a0458e578 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3189,7 +3189,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets) + if (reord < prior_fackets && reord <= tp->fackets_out) tcp_update_reordering(sk, tp->fackets_out - reord, 0); delta = tcp_is_fack(tp) ? pkts_acked : From 516b3ed999efddfec7c49f06b9630ec55ebc9631 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 15 May 2017 17:33:37 +0200 Subject: [PATCH 360/465] smc: switch to usage of IB_PD_UNSAFE_GLOBAL_RKEY [ Upstream commit 263eec9b2a82e8697d064709414914b5b10ac538 ] Currently, SMC enables remote access to physical memory when a user has successfully configured and established an SMC-connection until ten minutes after the last SMC connection is closed. Because this is considered a security risk, drivers are supposed to use IB_PD_UNSAFE_GLOBAL_RKEY in such a case. This patch changes the current SMC code to use IB_PD_UNSAFE_GLOBAL_RKEY. This improves user awareness, but does not remove the security risk itself. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_clc.c | 4 ++-- net/smc/smc_core.c | 16 +++------------- net/smc/smc_core.h | 2 +- net/smc/smc_ib.c | 21 ++------------------- net/smc/smc_ib.h | 2 -- 5 files changed, 8 insertions(+), 37 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index e41f594a1e1d..03ec058d18df 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); @@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 0eac633fb354..88cbb8a22d4a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -613,19 +613,8 @@ int smc_rmb_create(struct smc_sock *smc) rmb_desc = NULL; continue; /* if mapping failed, try smaller one */ } - rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd, - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE, - &rmb_desc->mr_rx[SMC_SINGLE_LINK]); - if (rc) { - smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, rmb_desc, - DMA_FROM_DEVICE); - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); - rmb_desc = NULL; - continue; - } + rmb_desc->rkey[SMC_SINGLE_LINK] = + lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey; rmb_desc->used = 1; write_lock_bh(&lgr->rmbs_lock); list_add(&rmb_desc->list, @@ -668,6 +657,7 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && + (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && test_bit(i, lgr->rtokens_used_mask)) { conn->rtoken_idx = i; return 0; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 27eb38056a27..b013cb43a327 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -93,7 +93,7 @@ struct smc_buf_desc { u64 dma_addr[SMC_LINKS_PER_LGR_MAX]; /* mapped address of buffer */ void *cpu_addr; /* virtual address of buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + u32 rkey[SMC_LINKS_PER_LGR_MAX]; /* for rmb only: * rkey provided to peer */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e6743c008ac5..51e5f0124f31 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -37,24 +37,6 @@ u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET; /* unique system * identifier */ -int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct ib_mr **mr) -{ - int rc; - - if (*mr) - return 0; /* already done */ - - /* obtain unique key - - * next invocation of get_dma_mr returns a different key! - */ - *mr = pd->device->get_dma_mr(pd, access_flags); - rc = PTR_ERR_OR_ZERO(*mr); - if (IS_ERR(*mr)) - *mr = NULL; - return rc; -} - static int smc_ib_modify_qp_init(struct smc_link *lnk) { struct ib_qp_attr qp_attr; @@ -213,7 +195,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) { int rc; - lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); + lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, + IB_PD_UNSAFE_GLOBAL_RKEY); rc = PTR_ERR_OR_ZERO(lnk->roce_pd); if (IS_ERR(lnk->roce_pd)) lnk->roce_pd = NULL; diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index a95f74bb5569..cde21263287c 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -60,8 +60,6 @@ void smc_ib_dealloc_protection_domain(struct smc_link *lnk); int smc_ib_create_protection_domain(struct smc_link *lnk); void smc_ib_destroy_queue_pair(struct smc_link *lnk); int smc_ib_create_queue_pair(struct smc_link *lnk); -int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct ib_mr **mr); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); From a19f55f9bb374b24f3bca3bfd7e75752b9e6d372 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 May 2017 09:51:38 +0300 Subject: [PATCH 361/465] net/smc: Add warning about remote memory exposure [ Upstream commit 19a0f7e37c0761a0a1cbf550705a6063c9675223 ] The driver explicitly bypasses APIs to register all memory once a connection is made, and thus allows remote access to memory. Signed-off-by: Christoph Hellwig Signed-off-by: Leon Romanovsky Acked-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/smc/Kconfig b/net/smc/Kconfig index c717ef0896aa..33954852f3f8 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -8,6 +8,10 @@ config SMC The Linux implementation of the SMC-R solution is designed as a separate socket family SMC. + Warning: SMC will expose all memory for remote reads and writes + once a connection is established. Don't enable this option except + for tightly controlled lab environment. + Select this option if you want to run SMC socket applications config SMC_DIAG From df6342be40ff97a8dc35fa8713c90d21fb4f2f67 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 May 2017 23:19:17 -0700 Subject: [PATCH 362/465] net: Improve handling of failures on link and route dumps [ Upstream commit f6c5775ff0bfa62b072face6bf1d40f659f194b2 ] In general, rtnetlink dumps do not anticipate failure to dump a single object (e.g., link or route) on a single pass. As both route and link objects have grown via more attributes, that is no longer a given. netlink dumps can handle a failure if the dump function returns an error; specifically, netlink_dump adds the return code to the response if it is <= 0 so userspace is notified of the failure. The missing piece is the rtnetlink dump functions returning the error. Fix route and link dump functions to return the errors if no object is added to an skb (detected by skb->len != 0). IPv6 route dumps (rt6_dump_route) already return the error; this patch updates IPv4 and link dumps. Other dump functions may need to be ajusted as well. Reported-by: Jan Moskyto Matejka Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 36 ++++++++++++++++++++++++------------ net/ipv4/fib_frontend.c | 15 +++++++++++---- net/ipv4/fib_trie.c | 26 ++++++++++++++------------ 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 69daf393cbe1..8d4a185a8143 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1620,13 +1620,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, flags, ext_filter_mask); - /* If we ran out of room on the first message, - * we're in trouble - */ - WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err < 0) - goto out; + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: @@ -1634,10 +1634,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) } } out: + err = skb->len; +out_err: cb->args[1] = idx; cb->args[0] = h; - return skb->len; + return err; } int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len) @@ -3427,8 +3429,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) err = br_dev->netdev_ops->ndo_bridge_getlink( skb, portid, seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } @@ -3439,16 +3445,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } } + err = skb->len; +out_err: rcu_read_unlock(); cb->args[0] = idx; - return skb->len; + return err; } static inline size_t bridge_nlmsg_size(void) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8f2133ffc2ff..a2f7de26670e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -760,7 +760,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_head *head; - int dumped = 0; + int dumped = 0, err; if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) @@ -780,20 +780,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (dumped) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); - if (fib_table_dump(tb, skb, cb) < 0) - goto out; + err = fib_table_dump(tb, skb, cb); + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } dumped = 1; next: e++; } } out: + err = skb->len; +out_err: rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; - return skb->len; + return err; } /* Prepare and feed intra-kernel routing request. diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2f0d8233950f..08709fb62d2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2079,6 +2079,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, /* rcu_read_lock is hold by caller */ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + int err; + if (i < s_i) { i++; continue; @@ -2089,17 +2091,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->tb_id, - fa->fa_type, - xkey, - KEYLENGTH - fa->fa_slen, - fa->fa_tos, - fa->fa_info, NLM_F_MULTI) < 0) { + err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + tb->tb_id, fa->fa_type, + xkey, KEYLENGTH - fa->fa_slen, + fa->fa_tos, fa->fa_info, NLM_F_MULTI); + if (err < 0) { cb->args[4] = i; - return -1; + return err; } i++; } @@ -2121,10 +2120,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, t_key key = cb->args[3]; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { + int err; + + err = fn_trie_dump_leaf(l, tb, skb, cb); + if (err < 0) { cb->args[3] = key; cb->args[2] = count; - return -1; + return err; } ++count; From 9909e4e4ff16e3f66b4e33e118621d7fe92fc6d4 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 16 May 2017 14:36:23 -0400 Subject: [PATCH 363/465] ipv6: Prevent overrun when parsing v6 header options [ Upstream commit 2423496af35d94a87156b063ea5cedffc10a70a1 ] The KASAN warning repoted below was discovered with a syzkaller program. The reproducer is basically: int s = socket(AF_INET6, SOCK_RAW, NEXTHDR_HOP); send(s, &one_byte_of_data, 1, MSG_MORE); send(s, &more_than_mtu_bytes_data, 2000, 0); The socket() call sets the nexthdr field of the v6 header to NEXTHDR_HOP, the first send call primes the payload with a non zero byte of data, and the second send call triggers the fragmentation path. The fragmentation code tries to parse the header options in order to figure out where to insert the fragment option. Since nexthdr points to an invalid option, the calculation of the size of the network header can made to be much larger than the linear section of the skb and data is read outside of it. This fix makes ip6_find_1stfrag return an error if it detects running out-of-bounds. [ 42.361487] ================================================================== [ 42.364412] BUG: KASAN: slab-out-of-bounds in ip6_fragment+0x11c8/0x3730 [ 42.365471] Read of size 840 at addr ffff88000969e798 by task ip6_fragment-oo/3789 [ 42.366469] [ 42.366696] CPU: 1 PID: 3789 Comm: ip6_fragment-oo Not tainted 4.11.0+ #41 [ 42.367628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014 [ 42.368824] Call Trace: [ 42.369183] dump_stack+0xb3/0x10b [ 42.369664] print_address_description+0x73/0x290 [ 42.370325] kasan_report+0x252/0x370 [ 42.370839] ? ip6_fragment+0x11c8/0x3730 [ 42.371396] check_memory_region+0x13c/0x1a0 [ 42.371978] memcpy+0x23/0x50 [ 42.372395] ip6_fragment+0x11c8/0x3730 [ 42.372920] ? nf_ct_expect_unregister_notifier+0x110/0x110 [ 42.373681] ? ip6_copy_metadata+0x7f0/0x7f0 [ 42.374263] ? ip6_forward+0x2e30/0x2e30 [ 42.374803] ip6_finish_output+0x584/0x990 [ 42.375350] ip6_output+0x1b7/0x690 [ 42.375836] ? ip6_finish_output+0x990/0x990 [ 42.376411] ? ip6_fragment+0x3730/0x3730 [ 42.376968] ip6_local_out+0x95/0x160 [ 42.377471] ip6_send_skb+0xa1/0x330 [ 42.377969] ip6_push_pending_frames+0xb3/0xe0 [ 42.378589] rawv6_sendmsg+0x2051/0x2db0 [ 42.379129] ? rawv6_bind+0x8b0/0x8b0 [ 42.379633] ? _copy_from_user+0x84/0xe0 [ 42.380193] ? debug_check_no_locks_freed+0x290/0x290 [ 42.380878] ? ___sys_sendmsg+0x162/0x930 [ 42.381427] ? rcu_read_lock_sched_held+0xa3/0x120 [ 42.382074] ? sock_has_perm+0x1f6/0x290 [ 42.382614] ? ___sys_sendmsg+0x167/0x930 [ 42.383173] ? lock_downgrade+0x660/0x660 [ 42.383727] inet_sendmsg+0x123/0x500 [ 42.384226] ? inet_sendmsg+0x123/0x500 [ 42.384748] ? inet_recvmsg+0x540/0x540 [ 42.385263] sock_sendmsg+0xca/0x110 [ 42.385758] SYSC_sendto+0x217/0x380 [ 42.386249] ? SYSC_connect+0x310/0x310 [ 42.386783] ? __might_fault+0x110/0x1d0 [ 42.387324] ? lock_downgrade+0x660/0x660 [ 42.387880] ? __fget_light+0xa1/0x1f0 [ 42.388403] ? __fdget+0x18/0x20 [ 42.388851] ? sock_common_setsockopt+0x95/0xd0 [ 42.389472] ? SyS_setsockopt+0x17f/0x260 [ 42.390021] ? entry_SYSCALL_64_fastpath+0x5/0xbe [ 42.390650] SyS_sendto+0x40/0x50 [ 42.391103] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.391731] RIP: 0033:0x7fbbb711e383 [ 42.392217] RSP: 002b:00007ffff4d34f28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 42.393235] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fbbb711e383 [ 42.394195] RDX: 0000000000001000 RSI: 00007ffff4d34f60 RDI: 0000000000000003 [ 42.395145] RBP: 0000000000000046 R08: 00007ffff4d34f40 R09: 0000000000000018 [ 42.396056] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000400aad [ 42.396598] R13: 0000000000000066 R14: 00007ffff4d34ee0 R15: 00007fbbb717af00 [ 42.397257] [ 42.397411] Allocated by task 3789: [ 42.397702] save_stack_trace+0x16/0x20 [ 42.398005] save_stack+0x46/0xd0 [ 42.398267] kasan_kmalloc+0xad/0xe0 [ 42.398548] kasan_slab_alloc+0x12/0x20 [ 42.398848] __kmalloc_node_track_caller+0xcb/0x380 [ 42.399224] __kmalloc_reserve.isra.32+0x41/0xe0 [ 42.399654] __alloc_skb+0xf8/0x580 [ 42.400003] sock_wmalloc+0xab/0xf0 [ 42.400346] __ip6_append_data.isra.41+0x2472/0x33d0 [ 42.400813] ip6_append_data+0x1a8/0x2f0 [ 42.401122] rawv6_sendmsg+0x11ee/0x2db0 [ 42.401505] inet_sendmsg+0x123/0x500 [ 42.401860] sock_sendmsg+0xca/0x110 [ 42.402209] ___sys_sendmsg+0x7cb/0x930 [ 42.402582] __sys_sendmsg+0xd9/0x190 [ 42.402941] SyS_sendmsg+0x2d/0x50 [ 42.403273] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.403718] [ 42.403871] Freed by task 1794: [ 42.404146] save_stack_trace+0x16/0x20 [ 42.404515] save_stack+0x46/0xd0 [ 42.404827] kasan_slab_free+0x72/0xc0 [ 42.405167] kfree+0xe8/0x2b0 [ 42.405462] skb_free_head+0x74/0xb0 [ 42.405806] skb_release_data+0x30e/0x3a0 [ 42.406198] skb_release_all+0x4a/0x60 [ 42.406563] consume_skb+0x113/0x2e0 [ 42.406910] skb_free_datagram+0x1a/0xe0 [ 42.407288] netlink_recvmsg+0x60d/0xe40 [ 42.407667] sock_recvmsg+0xd7/0x110 [ 42.408022] ___sys_recvmsg+0x25c/0x580 [ 42.408395] __sys_recvmsg+0xd6/0x190 [ 42.408753] SyS_recvmsg+0x2d/0x50 [ 42.409086] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.409513] [ 42.409665] The buggy address belongs to the object at ffff88000969e780 [ 42.409665] which belongs to the cache kmalloc-512 of size 512 [ 42.410846] The buggy address is located 24 bytes inside of [ 42.410846] 512-byte region [ffff88000969e780, ffff88000969e980) [ 42.411941] The buggy address belongs to the page: [ 42.412405] page:ffffea000025a780 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 [ 42.413298] flags: 0x100000000008100(slab|head) [ 42.413729] raw: 0100000000008100 0000000000000000 0000000000000000 00000001800c000c [ 42.414387] raw: ffffea00002a9500 0000000900000007 ffff88000c401280 0000000000000000 [ 42.415074] page dumped because: kasan: bad access detected [ 42.415604] [ 42.415757] Memory state around the buggy address: [ 42.416222] ffff88000969e880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 42.416904] ffff88000969e900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 42.417591] >ffff88000969e980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 42.418273] ^ [ 42.418588] ffff88000969ea00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 42.419273] ffff88000969ea80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 42.419882] ================================================================== Reported-by: Andrey Konovalov Signed-off-by: Craig Gallek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_offload.c | 2 ++ net/ipv6/ip6_output.c | 4 ++++ net/ipv6/output_core.c | 14 ++++++++------ net/ipv6/udp_offload.c | 2 ++ 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 93e58a5e1837..eab36abc9f22 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -117,6 +117,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (udpfrag) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (unfrag_ip6hlen < 0) + return ERR_PTR(unfrag_ip6hlen); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); if (skb->next) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 58f6288e9ba5..01deecda2f84 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -598,6 +598,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, u8 *prevhdr, nexthdr = 0; hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (hlen < 0) { + err = hlen; + goto fail; + } nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index cd4252346a32..e9065b8d3af8 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; - while (offset + 1 <= packet_len) { + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; switch (**nexthdr) { @@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; + if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) + return -EINVAL; + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; } - return offset; + return -EINVAL; } EXPORT_SYMBOL(ip6_find_1stfragopt); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ac858c480f2f..b348cff47395 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -91,6 +91,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, * bytes to insert fragment header. */ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (unfrag_ip6hlen < 0) + return ERR_PTR(unfrag_ip6hlen); nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + From 4b3a6fa35eed466bf6396427463bc8ae58e16b15 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 May 2017 22:54:11 -0400 Subject: [PATCH 364/465] ipv6: Check ip6_find_1stfragopt() return value properly. [ Upstream commit 7dd7eb9513bd02184d45f000ab69d78cb1fa1531 ] Do not use unsigned variables to see if it returns a negative error or not. Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options") Reported-by: Julia Lawall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_offload.c | 9 ++++----- net/ipv6/ip6_output.c | 7 +++---- net/ipv6/udp_offload.c | 8 +++++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index eab36abc9f22..280268f1dd7b 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, const struct net_offload *ops; int proto; struct frag_hdr *fptr; - unsigned int unfrag_ip6hlen; unsigned int payload_len; u8 *prevhdr; int offset = 0; @@ -116,10 +115,10 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (unfrag_ip6hlen < 0) - return ERR_PTR(unfrag_ip6hlen); - fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); + int err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + fptr = (struct frag_hdr *)((u8 *)ipv6h + err); fptr->frag_off = htons(offset); if (skb->next) fptr->frag_off |= htons(IP6_MF); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01deecda2f84..d4a31becbd25 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -597,11 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; - hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (hlen < 0) { - err = hlen; + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) goto fail; - } + hlen = err; nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b348cff47395..a2267f80febb 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u8 frag_hdr_sz = sizeof(struct frag_hdr); __wsum csum; int tnl_hlen; + int err; mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -90,9 +91,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Find the unfragmentable header and shift it left by frag_hdr_sz * bytes to insert fragment header. */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (unfrag_ip6hlen < 0) - return ERR_PTR(unfrag_ip6hlen); + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + unfrag_ip6hlen = err; nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + From 7b570175b3da6cb7f6e99e46eb04b1f8eb1ad220 Mon Sep 17 00:00:00 2001 From: Tobias Jungel Date: Wed, 17 May 2017 09:29:12 +0200 Subject: [PATCH 365/465] bridge: netlink: check vlan_default_pvid range [ Upstream commit a285860211bf257b0e6d522dac6006794be348af ] Currently it is allowed to set the default pvid of a bridge to a value above VLAN_VID_MASK (0xfff). This patch adds a check to br_validate and returns -EINVAL in case the pvid is out of bounds. Reproduce by calling: [root@test ~]# ip l a type bridge [root@test ~]# ip l a type dummy [root@test ~]# ip l s bridge0 type bridge vlan_filtering 1 [root@test ~]# ip l s bridge0 type bridge vlan_default_pvid 9999 [root@test ~]# ip l s dummy0 master bridge0 [root@test ~]# bridge vlan port vlan ids bridge0 9999 PVID Egress Untagged dummy0 9999 PVID Egress Untagged Fixes: 0f963b7592ef ("bridge: netlink: add support for default_pvid") Acked-by: Nikolay Aleksandrov Signed-off-by: Tobias Jungel Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 225ef7d53701..0488c6735c46 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -828,6 +828,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return -EPROTONOSUPPORT; } } + + if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { + __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); + + if (defpvid >= VLAN_VID_MASK) + return -EINVAL; + } #endif return 0; From bf97c6bf24f83f8a2141ff22856aa4a1ca6e0dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 17 May 2017 16:31:41 +0200 Subject: [PATCH 366/465] qmi_wwan: add another Lenovo EM74xx device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 486181bcb3248e2f1977f4e69387a898234a4e1e ] In their infinite wisdom, and never ending quest for end user frustration, Lenovo has decided to use a new USB device ID for the wwan modules in their 2017 laptops. The actual hardware is still the Sierra Wireless EM7455 or EM7430, depending on region. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 4e34568db64f..87746c2bc3d3 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -902,6 +902,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */ {QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ From 99c971a56d764f6f3bb7c6a2581b2d33cacc702a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 19 May 2017 22:20:29 +0800 Subject: [PATCH 367/465] bridge: start hello_timer when enabling KERNEL_STP in br_stp_start [ Upstream commit 6d18c732b95c0a9d35e9f978b4438bba15412284 ] Since commit 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers"), bridge would not start hello_timer if stp_enabled is not KERNEL_STP when br_dev_open. The problem is even if users set stp_enabled with KERNEL_STP later, the timer will still not be started. It causes that KERNEL_STP can not really work. Users have to re-ifup the bridge to avoid this. This patch is to fix it by starting br->hello_timer when enabling KERNEL_STP in br_stp_start. As an improvement, it's also to start hello_timer again only when br->stp_enabled is KERNEL_STP in br_hello_timer_expired, there is no reason to start the timer again when it's NO_STP. Fixes: 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers") Reported-by: Haidong Li Signed-off-by: Xin Long Acked-by: Nikolay Aleksandrov Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_stp_if.c | 1 + net/bridge/br_stp_timer.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 08341d2aa9c9..0db8102995a5 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -179,6 +179,7 @@ static void br_stp_start(struct net_bridge *br) br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); br_port_state_selection(br); } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index c98b3e5c140a..60b6fe277a8b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - if (br->stp_enabled != BR_USER_STP) + if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); } From 827624c3d1cfd1b569ec2c6593a6a50ab65c72bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 May 2017 14:17:48 -0700 Subject: [PATCH 368/465] ipv6: fix out of bound writes in __ip6_append_data() [ Upstream commit 232cd35d0804cc241eb887bb8d4d9b3b9881c64a ] Andrey Konovalov and idaifish@gmail.com reported crashes caused by one skb shared_info being overwritten from __ip6_append_data() Andrey program lead to following state : copy -4200 datalen 2000 fraglen 2040 maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200 The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); is overwriting skb->head and skb_shared_info Since we apparently detect this rare condition too late, move the code earlier to even avoid allocating skb and risking crashes. Once again, many thanks to Andrey and syzkaller team. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Reported-by: Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d4a31becbd25..bf8a58a1c32d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1466,6 +1466,11 @@ static int __ip6_append_data(struct sock *sk, */ alloclen += sizeof(struct frag_hdr); + copy = datalen - transhdrlen - fraggap; + if (copy < 0) { + err = -EINVAL; + goto error; + } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, @@ -1515,13 +1520,9 @@ static int __ip6_append_data(struct sock *sk, data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; - - if (copy < 0) { - err = -EINVAL; - kfree_skb(skb); - goto error; - } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { + if (copy > 0 && + getfrag(from, data + transhdrlen, offset, + copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; From 1cdcbe7c706a6f6bee2d844bf8e7d5fc7c270fb7 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 May 2017 19:43:45 -0400 Subject: [PATCH 369/465] bonding: fix accounting of active ports in 3ad [ Upstream commit 751da2a69b7cc82d83dc310ed7606225f2d6e014 ] As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not removed from the aggregator when they are down, and the active slave count is NOT equal to number of ports in the aggregator, but rather the number of ports in the aggregator that are still enabled. The sysfs spew for bonding_show_ad_num_ports() has a comment that says "Show number of active 802.3ad ports.", but it's currently showing total number of ports, both active and inactive. Remedy it by using the same logic introduced in 0622cab0341c in __bond_3ad_get_active_agg_info(), so sysfs, procfs and netlink all report the number of active ports. Note that this means that IFLA_BOND_AD_INFO_NUM_PORTS really means NUM_ACTIVE_PORTS instead of NUM_PORTS, and thus perhaps should be renamed for clarity. Lightly tested on a dual i40e lacp bond, simulating link downs with an ip link set dev down, was able to produce the state where I could see both in the same aggregator, but a number of ports count of 1. MII Status: up Active Aggregator Info: Aggregator ID: 1 Number of ports: 2 <--- Slave Interface: ens10 MII Status: up <--- Aggregator ID: 1 Slave Interface: ens11 MII Status: up Aggregator ID: 1 MII Status: up Active Aggregator Info: Aggregator ID: 1 Number of ports: 1 <--- Slave Interface: ens10 MII Status: down <--- Aggregator ID: 1 Slave Interface: ens11 MII Status: up Aggregator ID: 1 CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_3ad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index edc70ffad660..6dcc42d79cab 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2573,7 +2573,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, return -1; ad_info->aggregator_id = aggregator->aggregator_identifier; - ad_info->ports = aggregator->num_of_ports; + ad_info->ports = __agg_active_ports(aggregator); ad_info->actor_key = aggregator->actor_oper_aggregator_key; ad_info->partner_key = aggregator->partner_oper_aggregator_key; ether_addr_copy(ad_info->partner_system, From bea278025cbc41556fd8e7edd22a64d84a60ca2f Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 23 Feb 2017 11:19:36 +0200 Subject: [PATCH 370/465] net/mlx5: Avoid using pending command interface slots [ Upstream commit 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 ] Currently when firmware command gets stuck or it takes long time to complete, the driver command will get timeout and the command slot is freed and can be used for new commands, and if the firmware receive new command on the old busy slot its behavior is unexpected and this could be harmful. To fix this when the driver command gets timeout we return failure, but we don't free the command slot and we wait for the firmware to explicitly respond to that command. Once all the entries are busy we will stop processing new firmware commands. Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change') Signed-off-by: Mohamad Haj Yahia Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 41 ++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- .../net/ethernet/mellanox/mlx5/core/health.c | 2 +- include/linux/mlx5/driver.h | 7 +++- 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a380353a78c2..f95c869c7c29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -770,7 +770,7 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } static void cmd_work_handler(struct work_struct *work) @@ -800,6 +800,7 @@ static void cmd_work_handler(struct work_struct *work) } cmd->ent_arr[ent->idx] = ent; + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); @@ -821,6 +822,20 @@ static void cmd_work_handler(struct work_struct *work) if (ent->callback) schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + /* Skip sending command to fw if internal error */ + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + u8 status = 0; + u32 drv_synd; + + ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); + MLX5_SET(mbox_out, ent->out, status, status); + MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); + + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + return; + } + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -831,7 +846,7 @@ static void cmd_work_handler(struct work_struct *work) poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); } } @@ -875,7 +890,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } err = ent->ret; @@ -1371,7 +1386,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1391,6 +1406,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + + /* if we already completed the command, ignore it */ + if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, + &ent->state)) { + /* only real completion can free the cmd slot */ + if (!forced) { + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + free_ent(cmd, ent->idx); + } + continue; + } + if (ent->callback) cancel_delayed_work(&ent->cb_timeout_work); if (ent->page_queue) @@ -1413,7 +1441,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); } - free_ent(cmd, ent->idx); + + /* only real completion will free the entry slot */ + if (!forced) + free_ent(cmd, ent->idx); if (ent->callback) { ds = ent->ts2 - ent->ts1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea5d8d37a75c..33eae5ad2fb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); break; case MLX5_EVENT_TYPE_PORT_CHANGE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d0515391d33b..44f59b1d6f0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector); + mlx5_cmd_comp_handler(dev, vector, true); return; no_trig: diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2fcff6b4503f..c965d1165df6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -785,7 +785,12 @@ enum { typedef void (*mlx5_cmd_cbk_t)(int status, void *context); +enum { + MLX5_CMD_ENT_STATE_PENDING_COMP, +}; + struct mlx5_cmd_work_ent { + unsigned long state; struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; void *uout; @@ -979,7 +984,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, From f1cd4c6331e303f193604c095c71c4f391ea1219 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 23 May 2017 17:49:13 +0200 Subject: [PATCH 371/465] net: phy: marvell: Limit errata to 88m1101 [ Upstream commit f2899788353c13891412b273fdff5f02d49aa40f ] The 88m1101 has an errata when configuring autoneg. However, it was being applied to many other Marvell PHYs as well. Limit its scope to just the 88m1101. Fixes: 76884679c644 ("phylib: Add support for Marvell 88e1111S and 88e1145") Reported-by: Daniel Walker Signed-off-by: Andrew Lunn Acked-by: Harini Katakam Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/marvell.c | 66 ++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 272b051a0199..9097e42bec2e 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -255,34 +255,6 @@ static int marvell_config_aneg(struct phy_device *phydev) { int err; - /* The Marvell PHY has an errata which requires - * that certain registers get written in order - * to restart autonegotiation */ - err = phy_write(phydev, MII_BMCR, BMCR_RESET); - - if (err < 0) - return err; - - err = phy_write(phydev, 0x1d, 0x1f); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0x200c); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1d, 0x5); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0x100); - if (err < 0) - return err; - err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; @@ -316,6 +288,42 @@ static int marvell_config_aneg(struct phy_device *phydev) return 0; } +static int m88e1101_config_aneg(struct phy_device *phydev) +{ + int err; + + /* This Marvell PHY has an errata which requires + * that certain registers get written in order + * to restart autonegotiation + */ + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x1f); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x200c); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x5); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x100); + if (err < 0) + return err; + + return marvell_config_aneg(phydev); +} + static int m88e1111_config_aneg(struct phy_device *phydev) { int err; @@ -1892,7 +1900,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &marvell_config_init, - .config_aneg = &marvell_config_aneg, + .config_aneg = &m88e1101_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, From 423c1b43240d7369c0e06650bedb181d51cd8961 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:41 -0400 Subject: [PATCH 372/465] vlan: Fix tcp checksum offloads in Q-in-Q vlans [ Upstream commit 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff ] It appears that TCP checksum offloading has been broken for Q-in-Q vlans. The behavior was execerbated by the series commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'") that that enabled accleleration features on stacked vlans. However, event without that series, it is possible to trigger this issue. It just requires a lot more specialized configuration. The root cause is the interaction between how netdev_intersect_features() works, the features actually set on the vlan devices and HW having the ability to run checksum with longer headers. The issue starts when netdev_interesect_features() replaces NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM, if the HW advertises IP|IPV6 specific checksums. This happens for tagged and multi-tagged packets. However, HW that enables IP|IPV6 checksum offloading doesn't gurantee that packets with arbitrarily long headers can be checksummed. This patch disables IP|IPV6 checksums on the packet for multi-tagged packets. CC: Toshiaki Makita CC: Michal Kubecek Signed-off-by: Vladislav Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/if_vlan.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8d5fcd6284ce..283dc2f5364d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, netdev_features_t features) { - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged_multi(skb)) { + /* In the case of multi-tagged packets, use a direct mask + * instead of using netdev_interesect_features(), to make + * sure that only devices supporting NETIF_F_HW_CSUM will + * have checksum offloading support. + */ + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + } return features; } From acc866e9b53eb04a397750d1461c34c3c9622490 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:42 -0400 Subject: [PATCH 373/465] be2net: Fix offload features for Q-in-Q packets [ Upstream commit cc6e9de62a7f84c9293a2ea41bc412b55bb46e85 ] At least some of the be2net cards do not seem to be capabled of performing checksum offload computions on Q-in-Q packets. In these case, the recevied checksum on the remote is invalid and TCP syn packets are dropped. This patch adds a call to check disbled acceleration features on Q-in-Q tagged traffic. CC: Sathya Perla CC: Ajit Khaparde CC: Sriharsha Basavapatna CC: Somnath Kotur Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 6be3b9aba8ed..ff617beb8502 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5027,9 +5027,11 @@ static netdev_features_t be_features_check(struct sk_buff *skb, struct be_adapter *adapter = netdev_priv(dev); u8 l4_hdr = 0; - /* The code below restricts offload features for some tunneled packets. + /* The code below restricts offload features for some tunneled and + * Q-in-Q packets. * Offload features for normal (non tunnel) packets are unchanged. */ + features = vlan_features_check(skb, features); if (!skb->encapsulation || !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)) return features; From 4dbbbaad64c5189cd3177bb4adcbe80fcc63b4c0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:43 -0400 Subject: [PATCH 374/465] virtio-net: enable TSO/checksum offloads for Q-in-Q vlans [ Upstream commit 2836b4f224d4fd7d1a2b23c3eecaf0f0ae199a74 ] Since virtio does not provide it's own ndo_features_check handler, TSO, and now checksum offload, are disabled for stacked vlans. Re-enable the support and let the host take care of it. This restores/improves Guest-to-Guest performance over Q-in-Q vlans. Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f36584616e7d..d9d8f4f43f90 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1894,6 +1894,7 @@ static const struct net_device_ops virtnet_netdev = { .ndo_poll_controller = virtnet_netpoll, #endif .ndo_xdp = virtnet_xdp, + .ndo_features_check = passthru_features_check, }; static void virtnet_config_changed_work(struct work_struct *work) From 1642394fffd5cbcaef65b166708aa4a0443b5e8a Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Tue, 23 May 2017 18:37:27 -0400 Subject: [PATCH 375/465] geneve: fix fill_info when using collect_metadata [ Upstream commit 11387fe4a98f75d1f4cdb3efe3b42b19205c9df5 ] Since 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") fill_info does not return UDP_ZERO_CSUM6_RX when using COLLECT_METADATA. This is because it uses ip_tunnel_info_af() with the device level info, which is not valid for COLLECT_METADATA. Fix by checking for the presence of the actual sockets. Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index dec5d563ab19..959fd12d2e67 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1293,7 +1293,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) goto nla_put_failure; - if (ip_tunnel_info_af(info) == AF_INET) { + if (rtnl_dereference(geneve->sock4)) { if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, info->key.u.ipv4.dst)) goto nla_put_failure; @@ -1302,8 +1302,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(info->key.tun_flags & TUNNEL_CSUM))) goto nla_put_failure; + } + #if IS_ENABLED(CONFIG_IPV6) - } else { + if (rtnl_dereference(geneve->sock6)) { if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, &info->key.u.ipv6.dst)) goto nla_put_failure; @@ -1315,8 +1317,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, !geneve->use_udp6_rx_checksums)) goto nla_put_failure; -#endif } +#endif if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || From 0236d8c44eb8961365f6d9ed98b8aac06b2c23b1 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 24 May 2017 09:59:31 -0700 Subject: [PATCH 376/465] tcp: avoid fastopen API to be used on AF_UNSPEC [ Upstream commit ba615f675281d76fd19aa03558777f81fb6b6084 ] Fastopen API should be used to perform fastopen operations on the TCP socket. It does not make sense to use fastopen API to perform disconnect by calling it with AF_UNSPEC. The fastopen data path is also prone to race conditions and bugs when using with AF_UNSPEC. One issue reported and analyzed by Vegard Nossum is as follows: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Thread A: Thread B: ------------------------------------------------------------------------ sendto() - tcp_sendmsg() - sk_stream_memory_free() = 0 - goto wait_for_sndbuf - sk_stream_wait_memory() - sk_wait_event() // sleep | sendto(flags=MSG_FASTOPEN, dest_addr=AF_UNSPEC) | - tcp_sendmsg() | - tcp_sendmsg_fastopen() | - __inet_stream_connect() | - tcp_disconnect() //because of AF_UNSPEC | - tcp_transmit_skb()// send RST | - return 0; // no reconnect! | - sk_stream_wait_connect() | - sock_error() | - xchg(&sk->sk_err, 0) | - return -ECONNRESET - ... // wake up, see sk->sk_err == 0 - skb_entail() on TCP_CLOSE socket If the connection is reopened then we will send a brand new SYN packet after thread A has already queued a buffer. At this point I think the socket internal state (sequence numbers etc.) becomes messed up. When the new connection is closed, the FIN-ACK is rejected because the sequence number is outside the window. The other side tries to retransmit, but __tcp_retransmit_skb() calls tcp_trim_head() on an empty skb which corrupts the skb data length and hits a BUG() in copy_and_csum_bits(). +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Hence, this patch adds a check for AF_UNSPEC in the fastopen data path and return EOPNOTSUPP to user if such case happens. Fixes: cf60af03ca4e7 ("tcp: Fast Open client - sendmsg(MSG_FASTOPEN)") Reported-by: Vegard Nossum Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2dc7fcf60bf3..651f1f058a64 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); + struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) + if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && + uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ @@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, } } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; - err = __inet_stream_connect(sk->sk_socket, msg->msg_name, + err = __inet_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, flags, 1); /* fastopen_req could already be freed in __inet_stream_connect * if the connection times out or gets rst From 90e7c3322d0cd536b29c7ea70af906283317dc42 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 25 May 2017 19:14:56 +0200 Subject: [PATCH 377/465] sctp: fix ICMP processing if skb is non-linear [ Upstream commit 804ec7ebe8ea003999ca8d1bfc499edc6a9e07df ] sometimes ICMP replies to INIT chunks are ignored by the client, even if the encapsulated SCTP headers match an open socket. This happens when the ICMP packet is carried by a paged skb: use skb_header_pointer() to read packet contents beyond the SCTP header, so that chunk header and initiate tag are validated correctly. v2: - don't use skb_header_pointer() to read the transport header, since icmp_socket_deliver() already puts these 8 bytes in the linear area. - change commit message to make specific reference to INIT chunks. Signed-off-by: Davide Caratti Acked-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/input.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 0e06a278d2a9..ba9ad32fc447 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctp_association **app, struct sctp_transport **tpp) { + struct sctp_init_chunk *chunkhdr, _chunkhdr; union sctp_addr saddr; union sctp_addr daddr; struct sctp_af *af; struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; - struct sctp_init_chunk *chunkhdr; __u32 vtag = ntohl(sctphdr->vtag); - int len = skb->len - ((void *)sctphdr - (void *)skb->data); *app = NULL; *tpp = NULL; @@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * discard the packet. */ if (vtag == 0) { - chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); - if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) - + sizeof(__be32) || + /* chunk header + first 4 octects of init header */ + chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr), + sizeof(struct sctp_chunkhdr) + + sizeof(__be32), &_chunkhdr); + if (!chunkhdr || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || - ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) { + ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) goto out; - } + } else if (vtag != asoc->c.peer_vtag) { goto out; } From d3edf403e279401947de9b274537ffbfe5310c28 Mon Sep 17 00:00:00 2001 From: Peter Dawson Date: Fri, 26 May 2017 06:35:18 +1000 Subject: [PATCH 378/465] ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets [ Upstream commit 0e9a709560dbcfbace8bf4019dc5298619235891 ] This fix addresses two problems in the way the DSCP field is formulated on the encapsulating header of IPv6 tunnels. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195661 1) The IPv6 tunneling code was manipulating the DSCP field of the encapsulating packet using the 32b flowlabel. Since the flowlabel is only the lower 20b it was incorrect to assume that the upper 12b containing the DSCP and ECN fields would remain intact when formulating the encapsulating header. This fix handles the 'inherit' and 'fixed-value' DSCP cases explicitly using the extant dsfield u8 variable. 2) The use of INET_ECN_encapsulate(0, dsfield) in ip6_tnl_xmit was incorrect and resulted in the DSCP value always being set to 0. Commit 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class is non-0") caused the regression by masking out the flowlabel which exposed the incorrect handling of the DSCP portion of the flowlabel in ip6_tunnel and ip6_gre. Fixes: 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class is non-0") Signed-off-by: Peter Dawson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 13 +++++++------ net/ipv6/ip6_tunnel.c | 21 +++++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6fcb7cb49bb2..4d60164c17e2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -537,11 +537,10 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - dsfield = ipv4_get_dsfield(iph); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; + dsfield = ipv4_get_dsfield(iph); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -596,9 +595,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + dsfield = ipv6_get_dsfield(ipv6h); + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a9692ec0cd6d..15ff33934f79 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1196,7 +1196,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_flow_hdr(ipv6h, dsfield, ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; @@ -1231,8 +1231,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (tproto != IPPROTO_IPIP && tproto != 0) return -1; - dsfield = ipv4_get_dsfield(iph); - if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -1246,6 +1244,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPIP; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; + dsfield = ip6_tclass(key->label); } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; @@ -1254,8 +1253,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPIP; if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; + dsfield = ipv4_get_dsfield(iph); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; } @@ -1265,6 +1265,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); + skb_set_inner_ipproto(skb, IPPROTO_IPIP); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1298,8 +1300,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ip6_tnl_addr_conflict(t, ipv6h)) return -1; - dsfield = ipv6_get_dsfield(ipv6h); - if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -1313,6 +1313,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; + dsfield = ip6_tclass(key->label); } else { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ @@ -1335,7 +1336,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); + dsfield = ipv6_get_dsfield(ipv6h); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) @@ -1347,6 +1350,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + skb_set_inner_ipproto(skb, IPPROTO_IPV6); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, From 3b69d6516e1c2aac93fb1f24826ac6a8760156e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 May 2017 14:27:35 -0700 Subject: [PATCH 379/465] ipv4: add reference counting to metrics [ Upstream commit 3fb07daff8e99243366a081e5129560734de4ada ] Andrey Konovalov reported crashes in ipv4_mtu() I could reproduce the issue with KASAN kernels, between 10.246.7.151 and 10.246.7.152 : 1) 20 concurrent netperf -t TCP_RR -H 10.246.7.152 -l 1000 & 2) At the same time run following loop : while : do ip ro add 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 ip ro del 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 done Cong Wang attempted to add back rt->fi in commit 82486aa6f1b9 ("ipv4: restore rt->fi for reference counting") but this proved to add some issues that were complex to solve. Instead, I suggested to add a refcount to the metrics themselves, being a standalone object (in particular, no reference to other objects) I tried to make this patch as small as possible to ease its backport, instead of being super clean. Note that we believe that only ipv4 dst need to take care of the metric refcount. But if this is wrong, this patch adds the basic infrastructure to extend this to other families. Many thanks to Julian Anastasov for reviewing this patch, and Cong Wang for his efforts on this problem. Fixes: 2860583fe840 ("ipv4: Kill rt->fi") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Reviewed-by: Julian Anastasov Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/dst.h | 8 +++++++- include/net/ip_fib.h | 10 +++++----- net/core/dst.c | 23 ++++++++++++++--------- net/ipv4/fib_semantics.c | 17 ++++++++++------- net/ipv4/route.c | 10 +++++++++- 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 049af33da3b6..cfc043784166 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -107,10 +107,16 @@ struct dst_entry { }; }; +struct dst_metrics { + u32 metrics[RTAX_MAX]; + atomic_t refcnt; +}; +extern const struct dst_metrics dst_default_metrics; + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL +#define DST_METRICS_REFCOUNTED 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 368bb4024b78..892f8dccc835 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -114,11 +114,11 @@ struct fib_info { __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; - u32 *fib_metrics; -#define fib_mtu fib_metrics[RTAX_MTU-1] -#define fib_window fib_metrics[RTAX_WINDOW-1] -#define fib_rtt fib_metrics[RTAX_RTT-1] -#define fib_advmss fib_metrics[RTAX_ADVMSS-1] + struct dst_metrics *fib_metrics; +#define fib_mtu fib_metrics->metrics[RTAX_MTU-1] +#define fib_window fib_metrics->metrics[RTAX_WINDOW-1] +#define fib_rtt fib_metrics->metrics[RTAX_RTT-1] +#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; diff --git a/net/core/dst.c b/net/core/dst.c index 960e503b5a52..6192f11beec9 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard_out); -const u32 dst_default_metrics[RTAX_MAX + 1] = { +const struct dst_metrics dst_default_metrics = { /* This initializer is needed to force linker to place this variable * into const section. Otherwise it might end into bss section. * We really want to avoid false sharing on this variable, and catch * any writes on it. */ - [RTAX_MAX] = 0xdeadbeef, + .refcnt = ATOMIC_INIT(1), }; void dst_init(struct dst_entry *dst, struct dst_ops *ops, @@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, if (dev) dev_hold(dev); dst->ops = ops; - dst_init_metrics(dst, dst_default_metrics, true); + dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; dst->path = dst; dst->from = NULL; @@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release); u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) { - u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p) { - u32 *old_p = __DST_METRICS_PTR(old); + struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); unsigned long prev, new; - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + atomic_set(&p->refcnt, 1); + memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); new = (unsigned long) p; prev = cmpxchg(&dst->_metrics, old, new); if (prev != old) { kfree(p); - p = __DST_METRICS_PTR(prev); + p = (struct dst_metrics *)__DST_METRICS_PTR(prev); if (prev & DST_METRICS_READ_ONLY) p = NULL; + } else if (prev & DST_METRICS_REFCOUNTED) { + if (atomic_dec_and_test(&old_p->refcnt)) + kfree(old_p); } } - return p; + BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); + return (u32 *)p; } EXPORT_SYMBOL(dst_cow_metrics_generic); @@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) { unsigned long prev, new; - new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY; + new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; prev = cmpxchg(&dst->_metrics, old, new); if (prev == old) kfree(__DST_METRICS_PTR(old)); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 317026a39cfa..6cf74de06467 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -204,6 +204,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + struct dst_metrics *m; change_nexthops(fi) { if (nexthop_nh->nh_dev) @@ -214,8 +215,9 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - if (fi->fib_metrics != (u32 *) dst_default_metrics) - kfree(fi->fib_metrics); + m = fi->fib_metrics; + if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) + kfree(m); kfree(fi); } @@ -975,11 +977,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; - fi->fib_metrics[type - 1] = val; + fi->fib_metrics->metrics[type - 1] = val; } if (ecn_ca) - fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; return 0; } @@ -1037,11 +1039,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; if (cfg->fc_mx) { - fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); if (!fi->fib_metrics) goto failure; + atomic_set(&fi->fib_metrics->refcnt, 1); } else - fi->fib_metrics = (u32 *) dst_default_metrics; + fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; @@ -1242,7 +1245,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) goto nla_put_failure; - if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) + if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) goto nla_put_failure; if (fi->fib_prefsrc && diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d9724889ff09..e0cc6c1fe69d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1389,8 +1389,12 @@ static void rt_add_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *) dst; + if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) + kfree(p); + if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1442,7 +1446,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; rt->rt_uses_gateway = 1; } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); + dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); + if (fi->fib_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + atomic_inc(&fi->fib_metrics->refcnt); + } #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif From d6d2860eeefbbf574288b556bb3b17df3ad2f824 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:07 +0200 Subject: [PATCH 380/465] bpf: add bpf_clone_redirect to bpf_helper_changes_pkt_data [ Upstream commit 41703a731066fde79c3e5ccf3391cf77a98aeda5 ] The bpf_clone_redirect() still needs to be listed in bpf_helper_changes_pkt_data() since we call into bpf_try_make_head_writable() from there, thus we need to invalidate prior pkt regs as well. Fixes: 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index ebaeaf2e46e8..6ca3b05eb627 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2266,6 +2266,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_skb_change_head || func == bpf_skb_change_tail || func == bpf_skb_pull_data || + func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || func == bpf_xdp_adjust_head) From 87cebd0f198b3f1153e5aa8d63acf18c4d61af87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:08 +0200 Subject: [PATCH 381/465] bpf: fix wrong exposure of map_flags into fdinfo for lpm [ Upstream commit a316338cb71a3260201490e615f2f6d5c0d8fb2c ] trie_alloc() always needs to have BPF_F_NO_PREALLOC passed in via attr->map_flags, since it does not support preallocation yet. We check the flag, but we never copy the flag into trie->map.map_flags, which is later on exposed into fdinfo and used by loaders such as iproute2. Latter uses this in bpf_map_selfcheck_pinned() to test whether a pinned map has the same spec as the one from the BPF obj file and if not, bails out, which is currently the case for lpm since it exposes always 0 as flags. Also copy over flags in array_map_alloc() and stack_map_alloc(). They always have to be 0 right now, but we should make sure to not miss to copy them over at a later point in time when we add actual flags for them to use. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Jarno Rajahalme Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/arraymap.c | 1 + kernel/bpf/lpm_trie.c | 1 + kernel/bpf/stackmap.c | 1 + 3 files changed, 3 insertions(+) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 6b6f41f0b211..892f47c28377 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; + array->map.map_flags = attr->map_flags; array->elem_size = elem_size; if (!percpu) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index b37bd9ab7f57..7ab9e42180ab 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) trie->map.key_size = attr->key_size; trie->map.value_size = attr->value_size; trie->map.max_entries = attr->max_entries; + trie->map.map_flags = attr->map_flags; trie->data_size = attr->key_size - offsetof(struct bpf_lpm_trie_key, data); trie->max_prefixlen = trie->data_size * 8; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 22aa45cd0324..96f604317685 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) smap->map.key_size = attr->key_size; smap->map.value_size = value_size; smap->map.max_entries = attr->max_entries; + smap->map.map_flags = attr->map_flags; smap->n_buckets = n_buckets; smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; From 21dccb0f7cb4915845ac41fd39772c287e9b8777 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 May 2017 03:00:06 +0200 Subject: [PATCH 382/465] bpf: adjust verifier heuristics [ Upstream commit 3c2ce60bdd3d57051bf85615deec04a694473840 ] Current limits with regards to processing program paths do not really reflect today's needs anymore due to programs becoming more complex and verifier smarter, keeping track of more data such as const ALU operations, alignment tracking, spilling of PTR_TO_MAP_VALUE_ADJ registers, and other features allowing for smarter matching of what LLVM generates. This also comes with the side-effect that we result in fewer opportunities to prune search states and thus often need to do more work to prove safety than in the past due to different register states and stack layout where we mismatch. Generally, it's quite hard to determine what caused a sudden increase in complexity, it could be caused by something as trivial as a single branch somewhere at the beginning of the program where LLVM assigned a stack slot that is marked differently throughout other branches and thus causing a mismatch, where verifier then needs to prove safety for the whole rest of the program. Subsequently, programs with even less than half the insn size limit can get rejected. We noticed that while some programs load fine under pre 4.11, they get rejected due to hitting limits on more recent kernels. We saw that in the vast majority of cases (90+%) pruning failed due to register mismatches. In case of stack mismatches, majority of cases failed due to different stack slot types (invalid, spill, misc) rather than differences in spilled registers. This patch makes pruning more aggressive by also adding markers that sit at conditional jumps as well. Currently, we only mark jump targets for pruning. For example in direct packet access, these are usually error paths where we bail out. We found that adding these markers, it can reduce number of processed insns by up to 30%. Another option is to ignore reg->id in probing PTR_TO_MAP_VALUE_OR_NULL registers, which can help pruning slightly as well by up to 7% observed complexity reduction as stand-alone. Meaning, if a previous path with register type PTR_TO_MAP_VALUE_OR_NULL for map X was found to be safe, then in the current state a PTR_TO_MAP_VALUE_OR_NULL register for the same map X must be safe as well. Last but not least the patch also adds a scheduling point and bumps the current limit for instructions to be processed to a more adequate value. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6fd78d4c4164..904decd32783 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem { struct bpf_verifier_stack_elem *next; }; -#define BPF_COMPLEXITY_LIMIT_INSNS 65536 +#define BPF_COMPLEXITY_LIMIT_INSNS 98304 #define BPF_COMPLEXITY_LIMIT_STACK 1024 struct bpf_call_arg_meta { @@ -2546,6 +2546,7 @@ static int check_cfg(struct bpf_verifier_env *env) env->explored_states[t + 1] = STATE_LIST_MARK; } else { /* conditional jump with two edges */ + env->explored_states[t] = STATE_LIST_MARK; ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret == 1) goto peek_stack; @@ -2704,6 +2705,12 @@ static bool states_equal(struct bpf_verifier_env *env, rcur->type != NOT_INIT)) continue; + /* Don't care about the reg->id in this case. */ + if (rold->type == PTR_TO_MAP_VALUE_OR_NULL && + rcur->type == PTR_TO_MAP_VALUE_OR_NULL && + rold->map_ptr == rcur->map_ptr) + continue; + if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && compare_ptrs_to_packet(rold, rcur)) continue; @@ -2838,6 +2845,9 @@ static int do_check(struct bpf_verifier_env *env) goto process_bpf_exit; } + if (need_resched()) + cond_resched(); + if (log_level && do_print_state) { verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); print_verifier_state(&env->cur_state); From e346489fac4cf0dfc947ebc295292e2444a7cb8f Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Mon, 15 May 2017 16:28:17 -0700 Subject: [PATCH 383/465] sparc64: Fix mapping of 64k pages with MAP_FIXED [ Upstream commit b6c41cb050d5debc7e4eaa0a81cbdbad72588891 ] An incorrect huge page alignment check caused mmap failure for 64K pages when MAP_FIXED is used with address not aligned to HPAGE_SIZE. Orabug: 25885991 Fixes: dcd1912d21a0 ("sparc64: Add 64K page size support") Signed-off-by: Nitin Gupta Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/hugetlb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index dcbf985ab243..d1f837dc77a4 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -24,9 +24,11 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } From 76037bf9e12e1ed1885d3f552a16fadcdaa370f0 Mon Sep 17 00:00:00 2001 From: Orlando Arias Date: Tue, 16 May 2017 15:34:00 -0400 Subject: [PATCH 384/465] sparc: Fix -Wstringop-overflow warning [ Upstream commit deba804c90642c8ed0f15ac1083663976d578f54 ] Greetings, GCC 7 introduced the -Wstringop-overflow flag to detect buffer overflows in calls to string handling functions [1][2]. Due to the way ``empty_zero_page'' is declared in arch/sparc/include/setup.h, this causes a warning to trigger at compile time in the function mem_init(), which is subsequently converted to an error. The ensuing patch fixes this issue and aligns the declaration of empty_zero_page to that of other architectures. Thank you. Cheers, Orlando. [1] https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02308.html [2] https://gcc.gnu.org/gcc-7/changes.html Signed-off-by: Orlando Arias -------------------------------------------------------------------------------- Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/pgtable_32.h | 4 ++-- arch/sparc/include/asm/setup.h | 2 +- arch/sparc/mm/init_32.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index ce6f56980aef..cf190728360b 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -91,9 +91,9 @@ extern unsigned long pfn_base; * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page)) +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * In general all page table modifications should use the V8 atomic diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 478bf6bb4598..3fae200dd251 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -16,7 +16,7 @@ extern char reboot_command[]; */ extern unsigned char boot_cpu_id; -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern int serial_console; static inline int con_is_present(void) diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index c6afe98de4d9..3bd0d513bddb 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -290,7 +290,7 @@ void __init mem_init(void) /* Saves us work later. */ - memset((void *)&empty_zero_page, 0, PAGE_SIZE); + memset((void *)empty_zero_page, 0, PAGE_SIZE); i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5); i += 1; From f351b1226d10b7e2b924016287aaa10c1e6af789 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 17 May 2017 11:47:00 -0400 Subject: [PATCH 385/465] sparc/ftrace: Fix ftrace graph time measurement [ Upstream commit 48078d2dac0a26f84f5f3ec704f24f7c832cce14 ] The ftrace function_graph time measurements of a given function is not accurate according to those recorded by ftrace using the function filters. This change pulls the x86_64 fix from 'commit 722b3c746953 ("ftrace/graph: Trace function entry before updating index")' into the sparc specific prepare_ftrace_return which stops ftrace from counting interrupted tasks in the time measurement. Example measurements for select_task_rq_fair running "hackbench 100 process 1000": | tracing/trace_stat/function0 | function_graph Before patch | 2.802 us | 4.255 us After patch | 2.749 us | 3.094 us Signed-off-by: Liam R. Howlett Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/ftrace.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 6bcff698069b..cec54dc4ab81 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -130,17 +130,16 @@ unsigned long prepare_ftrace_return(unsigned long parent, if (unlikely(atomic_read(¤t->tracing_graph_pause))) return parent + 8UL; - if (ftrace_push_return_trace(parent, self_addr, &trace.depth, - frame_pointer, NULL) == -EBUSY) - return parent + 8UL; - trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; + if (!ftrace_graph_entry(&trace)) + return parent + 8UL; + + if (ftrace_push_return_trace(parent, self_addr, &trace.depth, + frame_pointer, NULL) == -EBUSY) return parent + 8UL; - } return return_hooker; } From 72351ac5cdcb09186469f2a27e5aaccda044c61f Mon Sep 17 00:00:00 2001 From: Richard Narron Date: Sun, 4 Jun 2017 16:23:18 -0700 Subject: [PATCH 386/465] fs/ufs: Set UFS default maximum bytes per file commit 239e250e4acbc0104d514307029c0839e834a51a upstream. This fixes a problem with reading files larger than 2GB from a UFS-2 file system: https://bugzilla.kernel.org/show_bug.cgi?id=195721 The incorrect UFS s_maxsize limit became a problem as of commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") which started using s_maxbytes to avoid a page index overflow in do_generic_file_read(). That caused files to be truncated on UFS-2 file systems because the default maximum file size is 2GB (MAX_NON_LFS) and UFS didn't update it. Here I simply increase the default to a common value used by other file systems. Signed-off-by: Richard Narron Cc: Al Viro Cc: Will B Cc: Theodore Ts'o Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ufs/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 131b2b77c818..29ecaf739449 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -812,9 +812,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_dirblksize = UFS_SECTOR_SIZE; super_block_offset=UFS_SBLOCK; - /* Keep 2Gig file limit. Some UFS variants need to override - this but as I don't know which I'll let those in the know loosen - the rules */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) { case UFS_MOUNT_UFSTYPE_44BSD: UFSD("ufstype=44bsd\n"); From 919c7173e08ac070794873decb8ce1335219503f Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 24 May 2017 17:03:26 +1000 Subject: [PATCH 387/465] powerpc: Fix booting P9 hash with CONFIG_PPC_RADIX_MMU=N commit d957fb4d173647640a2b83e7c7e56a580e7fc7e7 upstream. Currently if you disable CONFIG_PPC_RADIX_MMU you'll crash on boot on a P9. This is because we still set MMU_FTR_TYPE_RADIX via ibm,pa-features and MMU_FTR_TYPE_RADIX is what's used for code patching in much of the asm code (ie. slb_miss_realmode) This patch fixes the problem by stopping MMU_FTR_TYPE_RADIX from being set from ibm.pa-features. We may eventually end up removing the CONFIG_PPC_RADIX_MMU option completely but until then this fixes the issue. Fixes: 17a3dd2f5fc7 ("powerpc/mm/radix: Use firmware feature to enable Radix MMU") Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/prom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f5d399e46193..8d2d98f9ad02 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -161,7 +161,9 @@ static struct ibm_pa_feature { { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, +#ifdef CONFIG_PPC_RADIX_MMU { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, +#endif { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, From baa4d4112e6f8f449056b7e32d55a30102976b54 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 24 May 2017 16:49:59 +1000 Subject: [PATCH 388/465] powerpc/spufs: Fix hash faults for kernel regions commit d75e4919cc0b6fbcbc8d6654ef66d87a9dbf1526 upstream. Commit ac29c64089b7 ("powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED") swapped _PAGE_USER for _PAGE_PRIVILEGED, and introduced check_pte_access() which denied kernel access to non-_PAGE_PRIVILEGED pages. However, it didn't add _PAGE_PRIVILEGED to the hash fault handler for spufs' kernel accesses, so the DMAs required to establish SPE memory no longer work. This change adds _PAGE_PRIVILEGED to the hash fault handler for kernel accesses. Fixes: ac29c64089b7 ("powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED") Signed-off-by: Jeremy Kerr Reported-by: Sombat Tragolgosol Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/cell/spu_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 96c2b8a40630..0c45cdbac4cf 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -197,7 +197,9 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr); + ret = hash_page(ea, + _PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED, + 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { From 427fa8e39177c07d4fbbb2595241a0bef5b6d751 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 11 Apr 2017 19:07:28 +0200 Subject: [PATCH 389/465] Revert "tty_port: register tty ports with serdev bus" commit d3ba126a226a6b6da021ebfea444a2a807cde945 upstream. This reverts commit 8ee3fde047589dc9c201251f07d0ca1dc776feca. The new serdev bus hooked into the tty layer in tty_port_register_device() by registering a serdev controller instead of a tty device whenever a serdev client is present, and by deregistering the controller in the tty-port destructor. This is broken in several ways: Firstly, it leads to a NULL-pointer dereference whenever a tty driver later deregisters its devices as no corresponding character device will exist. Secondly, far from every tty driver uses tty-port refcounting (e.g. serial core) so the serdev devices might never be deregistered or deallocated. Thirdly, deregistering at tty-port destruction is too late as the underlying device and structures may be long gone by then. A port is not released before an open tty device is closed, something which a registered serdev client can prevent from ever happening. A driver callback while the device is gone typically also leads to crashes. Many tty drivers even keep their ports around until the driver is unloaded (e.g. serial core), something which even if a late callback never happens, leads to leaks if a device is unbound from its driver and is later rebound. The right solution here is to add a new tty_port_unregister_device() helper and to never call tty_device_unregister() whenever the port has been claimed by serdev, but since this requires modifying just about every tty driver (and multiple subsystems) it will need to be done incrementally. Reverting the offending patch is the first step in fixing the broken lifetime assumptions. A follow-up patch will add a new pair of tty-device registration helpers, which a vetted tty driver can use to support serdev (initially serial core). When every tty driver uses the serdev helpers (at least for deregistration), we can add serdev registration to tty_port_register_device() again. Note that this also fixes another issue with serdev, which currently allocates and registers a serdev controller for every tty device registered using tty_port_device_register() only to immediately deregister and deallocate it when the corresponding OF node or serdev child node is missing. This should be addressed before enabling serdev for hot-pluggable buses. Signed-off-by: Johan Hovold Reviewed-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 1d21a9c1d33e..0c880f17d27e 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -16,7 +16,6 @@ #include #include #include -#include static int tty_port_default_receive_buf(struct tty_port *port, const unsigned char *p, @@ -129,15 +128,7 @@ struct device *tty_port_register_device_attr(struct tty_port *port, struct device *device, void *drvdata, const struct attribute_group **attr_grp) { - struct device *dev; - tty_port_link_device(port, driver, index); - - dev = serdev_tty_port_register(port, device, driver, index); - if (PTR_ERR(dev) != -ENODEV) - /* Skip creating cdev if we registered a serdev device */ - return dev; - return tty_register_device_attr(driver, index, device, drvdata, attr_grp); } @@ -189,9 +180,6 @@ static void tty_port_destructor(struct kref *kref) /* check if last port ref was dropped before tty release */ if (WARN_ON(port->itty)) return; - - serdev_tty_port_unregister(port); - if (port->xmit_buf) free_page((unsigned long)port->xmit_buf); tty_port_destroy(port); From 84ac7693f46fde81e30387b0bf6d3931df4f5420 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 11 Apr 2017 19:07:29 +0200 Subject: [PATCH 390/465] serdev: fix tty-port client deregistration commit aee5da7838787f8ed47f825dbe09e2812acdf97b upstream. The port client data must be set when registering the serdev controller or client deregistration will fail (and the serdev devices are left registered and allocated) if the port was never opened in between. Make sure to clear the port client data on any probe errors to avoid a use-after-free when the client is later deregistered unconditionally (e.g. in a tty-port deregistration helper). Also move port client operation initialisation to registration. Note that the client ops must be restored on failed probe. Fixes: bed35c6dfa6a ("serdev: add a tty port controller driver") Signed-off-by: Johan Hovold Reviewed-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/serdev-ttyport.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index d05393594f15..06e17faa6dfb 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -101,9 +101,6 @@ static int ttyport_open(struct serdev_controller *ctrl) return PTR_ERR(tty); serport->tty = tty; - serport->port->client_ops = &client_ops; - serport->port->client_data = ctrl; - if (tty->ops->open) tty->ops->open(serport->tty, NULL); else @@ -181,6 +178,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx) { + const struct tty_port_client_operations *old_ops; struct serdev_controller *ctrl; struct serport *serport; int ret; @@ -199,15 +197,22 @@ struct device *serdev_tty_port_register(struct tty_port *port, ctrl->ops = &ctrl_ops; + old_ops = port->client_ops; + port->client_ops = &client_ops; + port->client_data = ctrl; + ret = serdev_controller_add(ctrl); if (ret) - goto err_controller_put; + goto err_reset_data; dev_info(&ctrl->dev, "tty port %s%d registered\n", drv->name, idx); return &ctrl->dev; -err_controller_put: +err_reset_data: + port->client_data = NULL; + port->client_ops = old_ops; serdev_controller_put(ctrl); + return ERR_PTR(ret); } From e4bab31cf957f3b5534e7137ffa6e1267a1e9c00 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 18 May 2017 12:29:55 +0100 Subject: [PATCH 391/465] drivers/tty: 8250: only call fintek_8250_probe when doing port I/O commit 4c4fc90964b1cf205a67df566cc82ea1731bcb00 upstream. Commit fa01e2ca9f53 ("serial: 8250: Integrate Fintek into 8250_base") modified the probing logic for PNP0501 devices, to remove a collision between the generic 16550A driver and the Fintek driver, which reused the same ACPI _HID. The Fintek device probe is now incorporated into the common 8250 probe path, and gets called for all discovered 16550A compatible devices, including ones that are MMIO mapped rather than IO mapped. However, the Fintek driver assumes the port base is a I/O address, and proceeds to probe some arbitrary offsets above it. This is generally a wrong thing to do, but on ARM systems (having no native port I/O), this may result in faulting accesses of completely unrelated MMIO regions in the PCI I/O space. Given that this is at serial probe time, this results in hard to diagnose crashes at boot. So let's restrict the Fintek probe to devices that we know are using port I/O in the first place. Fixes: fa01e2ca9f53 ("serial: 8250: Integrate Fintek into 8250_base") Suggested-by: Arnd Bergmann Reviewed-by: Ricardo Ribalda Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 6119516ef5fc..4c26d15ad7d9 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1337,7 +1337,7 @@ static void autoconfig(struct uart_8250_port *up) /* * Check if the device is a Fintek F81216A */ - if (port->type == PORT_16550A) + if (port->type == PORT_16550A && port->iotype == UPIO_PORT) fintek_8250_probe(up); if (up->capabilities != old_capabilities) { From 2da7518890d83a7fc94de41f1bcf7d24b9e13fa9 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 5 May 2017 11:06:50 +0200 Subject: [PATCH 392/465] i2c: i2c-tiny-usb: fix buffer not being DMA capable commit 5165da5923d6c7df6f2927b0113b2e4d9288661e upstream. Since v4.9 i2c-tiny-usb generates the below call trace and longer works, since it can't communicate with the USB device. The reason is, that since v4.9 the USB stack checks, that the buffer it should transfer is DMA capable. This was a requirement since v2.2 days, but it usually worked nevertheless. [ 17.504959] ------------[ cut here ]------------ [ 17.505488] WARNING: CPU: 0 PID: 93 at drivers/usb/core/hcd.c:1587 usb_hcd_map_urb_for_dma+0x37c/0x570 [ 17.506545] transfer buffer not dma capable [ 17.507022] Modules linked in: [ 17.507370] CPU: 0 PID: 93 Comm: i2cdetect Not tainted 4.11.0-rc8+ #10 [ 17.508103] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 17.509039] Call Trace: [ 17.509320] ? dump_stack+0x5c/0x78 [ 17.509714] ? __warn+0xbe/0xe0 [ 17.510073] ? warn_slowpath_fmt+0x5a/0x80 [ 17.510532] ? nommu_map_sg+0xb0/0xb0 [ 17.510949] ? usb_hcd_map_urb_for_dma+0x37c/0x570 [ 17.511482] ? usb_hcd_submit_urb+0x336/0xab0 [ 17.511976] ? wait_for_completion_timeout+0x12f/0x1a0 [ 17.512549] ? wait_for_completion_timeout+0x65/0x1a0 [ 17.513125] ? usb_start_wait_urb+0x65/0x160 [ 17.513604] ? usb_control_msg+0xdc/0x130 [ 17.514061] ? usb_xfer+0xa4/0x2a0 [ 17.514445] ? __i2c_transfer+0x108/0x3c0 [ 17.514899] ? i2c_transfer+0x57/0xb0 [ 17.515310] ? i2c_smbus_xfer_emulated+0x12f/0x590 [ 17.515851] ? _raw_spin_unlock_irqrestore+0x11/0x20 [ 17.516408] ? i2c_smbus_xfer+0x125/0x330 [ 17.516876] ? i2c_smbus_xfer+0x125/0x330 [ 17.517329] ? i2cdev_ioctl_smbus+0x1c1/0x2b0 [ 17.517824] ? i2cdev_ioctl+0x75/0x1c0 [ 17.518248] ? do_vfs_ioctl+0x9f/0x600 [ 17.518671] ? vfs_write+0x144/0x190 [ 17.519078] ? SyS_ioctl+0x74/0x80 [ 17.519463] ? entry_SYSCALL_64_fastpath+0x1e/0xad [ 17.519959] ---[ end trace d047c04982f5ac50 ]--- Signed-off-by: Sebastian Reichel Reviewed-by: Greg Kroah-Hartman Acked-by: Till Harbaum Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-tiny-usb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index 0ed77eeff31e..a2e3dd715380 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -178,22 +178,39 @@ static int usb_read(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; + void *dmadata = kmalloc(len, GFP_KERNEL); + int ret; + + if (!dmadata) + return -ENOMEM; /* do control transfer */ - return usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0), + ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | - USB_DIR_IN, value, index, data, len, 2000); + USB_DIR_IN, value, index, dmadata, len, 2000); + + memcpy(data, dmadata, len); + kfree(dmadata); + return ret; } static int usb_write(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; + void *dmadata = kmemdup(data, len, GFP_KERNEL); + int ret; + + if (!dmadata) + return -ENOMEM; /* do control transfer */ - return usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0), + ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE, - value, index, data, len, 2000); + value, index, dmadata, len, 2000); + + kfree(dmadata); + return ret; } static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev) From f5eef8d2458bb569ca521b3c2b0a19af62536745 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 10 May 2017 03:48:23 +0800 Subject: [PATCH 393/465] crypto: skcipher - Add missing API setkey checks commit 9933e113c2e87a9f46a40fde8dafbf801dca1ab9 upstream. The API setkey checks for key sizes and alignment went AWOL during the skcipher conversion. This patch restores them. Fixes: 4e6c3df4d729 ("crypto: skcipher - Add low-level skcipher...") Reported-by: Baozeng Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/skcipher.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 014af741fc6a..4faa0fd53b0c 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -764,6 +764,44 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm) return 0; } +static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + unsigned long alignmask = crypto_skcipher_alignmask(tfm); + struct skcipher_alg *cipher = crypto_skcipher_alg(tfm); + u8 *buffer, *alignbuffer; + unsigned long absize; + int ret; + + absize = keylen + alignmask; + buffer = kmalloc(absize, GFP_ATOMIC); + if (!buffer) + return -ENOMEM; + + alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + memcpy(alignbuffer, key, keylen); + ret = cipher->setkey(tfm, alignbuffer, keylen); + kzfree(buffer); + return ret; +} + +static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct skcipher_alg *cipher = crypto_skcipher_alg(tfm); + unsigned long alignmask = crypto_skcipher_alignmask(tfm); + + if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) { + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + if ((unsigned long)key & alignmask) + return skcipher_setkey_unaligned(tfm, key, keylen); + + return cipher->setkey(tfm, key, keylen); +} + static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm) { struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); @@ -784,7 +822,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) tfm->__crt_alg->cra_type == &crypto_givcipher_type) return crypto_init_skcipher_ops_ablkcipher(tfm); - skcipher->setkey = alg->setkey; + skcipher->setkey = skcipher_setkey; skcipher->encrypt = alg->encrypt; skcipher->decrypt = alg->decrypt; skcipher->ivsize = alg->ivsize; From 8f8dca3c86b3b7748968c38ec5b2bc40f85b018b Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 9 May 2017 15:02:22 +0800 Subject: [PATCH 394/465] Revert "ACPI / button: Remove lid_init_state=method mode" commit f369fdf4f661322b73f3307e9f3cd55fb3a20123 upstream. This reverts commit ecb10b694b72ca5ea51b3c90a71ff2a11963425a. The only expected ACPI control method lid device's usage model is 1. Listen to the lid notification, 2. Evaluate _LID after being notified by BIOS, 3. Suspend the system (if users configure to do so) after seeing "close". It's not ensured that BIOS will notify OS after boot/resume, and it's not ensured that BIOS will always generate "open" event upon opening the lid. But there are 2 wrong usage models: 1. When the lid device is responsible for suspend/resume the system, userspace requires to see "open" event to be paired with "close" after the system is resumed, or it will suspend the system again. 2. When an external monitor connects to the laptop attached docks, userspace requires to see "close" event after the system is resumed so that it can determine whether the internal display should remain dark and the external display should be lit on. After we made default kernel behavior to be suitable for usage model 1, users of usage model 2 start to report regressions for such behavior change. Reversion of button.lid_init_state=method doesn't actually reverts to old default behavior as doing so can enter a regression loop, but facilitates users to work the reported regressions around with button.lid_init_state=method. Fixes: ecb10b694b72 (ACPI / button: Remove lid_init_state=method mode) Link: https://bugzilla.kernel.org/show_bug.cgi?id=195455 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1430259 Tested-by: Steffen Weber Tested-by: Julian Wiedmann Reported-by: Joachim Frieben Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- Documentation/acpi/acpi-lid.txt | 16 ++++++++++++---- drivers/acpi/button.c | 9 +++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt index 22cb3091f297..effe7af3a5af 100644 --- a/Documentation/acpi/acpi-lid.txt +++ b/Documentation/acpi/acpi-lid.txt @@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues. If the userspace hasn't been prepared to ignore the unreliable "opened" events and the unreliable initial state notification, Linux users can use the following kernel parameters to handle the possible issues: -A. button.lid_init_state=open: +A. button.lid_init_state=method: + When this option is specified, the ACPI button driver reports the + initial lid state using the returning value of the _LID control method + and whether the "opened"/"closed" events are paired fully relies on the + firmware implementation. + This option can be used to fix some platforms where the returning value + of the _LID control method is reliable but the initial lid state + notification is missing. + This option is the default behavior during the period the userspace + isn't ready to handle the buggy AML tables. +B. button.lid_init_state=open: When this option is specified, the ACPI button driver always reports the initial lid state as "opened" and whether the "opened"/"closed" events are paired fully relies on the firmware implementation. This may fix some platforms where the returning value of the _LID control method is not reliable and the initial lid state notification is missing. - This option is the default behavior during the period the userspace - isn't ready to handle the buggy AML tables. If the userspace has been prepared to ignore the unreliable "opened" events and the unreliable initial state notification, Linux users should always use the following kernel parameter: -B. button.lid_init_state=ignore: +C. button.lid_init_state=ignore: When this option is specified, the ACPI button driver never reports the initial lid state and there is a compensation mechanism implemented to ensure that the reliable "closed" notifications can always be delievered diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 668137e4a069..6d5a8c1d3132 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -57,6 +57,7 @@ #define ACPI_BUTTON_LID_INIT_IGNORE 0x00 #define ACPI_BUTTON_LID_INIT_OPEN 0x01 +#define ACPI_BUTTON_LID_INIT_METHOD 0x02 #define _COMPONENT ACPI_BUTTON_COMPONENT ACPI_MODULE_NAME("button"); @@ -376,6 +377,9 @@ static void acpi_lid_initialize_state(struct acpi_device *device) case ACPI_BUTTON_LID_INIT_OPEN: (void)acpi_lid_notify_state(device, 1); break; + case ACPI_BUTTON_LID_INIT_METHOD: + (void)acpi_lid_update_state(device); + break; case ACPI_BUTTON_LID_INIT_IGNORE: default: break; @@ -559,6 +563,9 @@ static int param_set_lid_init_state(const char *val, struct kernel_param *kp) if (!strncmp(val, "open", sizeof("open") - 1)) { lid_init_state = ACPI_BUTTON_LID_INIT_OPEN; pr_info("Notify initial lid state as open\n"); + } else if (!strncmp(val, "method", sizeof("method") - 1)) { + lid_init_state = ACPI_BUTTON_LID_INIT_METHOD; + pr_info("Notify initial lid state with _LID return value\n"); } else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) { lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE; pr_info("Do not notify initial lid state\n"); @@ -572,6 +579,8 @@ static int param_get_lid_init_state(char *buffer, struct kernel_param *kp) switch (lid_init_state) { case ACPI_BUTTON_LID_INIT_OPEN: return sprintf(buffer, "open"); + case ACPI_BUTTON_LID_INIT_METHOD: + return sprintf(buffer, "method"); case ACPI_BUTTON_LID_INIT_IGNORE: return sprintf(buffer, "ignore"); default: From 9183980a9e8a8ca4540c8367434dfe3c97177ce0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 19 May 2017 11:39:09 +0200 Subject: [PATCH 395/465] x86/MCE: Export memory_error() commit 2d1f406139ec20320bf38bcd2461aa8e358084b5 upstream. Export the function which checks whether an MCE is a memory error to other users so that we can reuse the logic. Drop the boot_cpu_data use, while at it, as mce.cpuvendor already has the CPU vendor in there. Integrate a piece from a patch from Vishal Verma to export it for modules (nfit). The main reason we're exporting it is that the nfit handler nfit_handle_mce() needs to detect a memory error properly before doing its recovery actions. Signed-off-by: Borislav Petkov Cc: Tony Luck Cc: Vishal Verma Link: http://lkml.kernel.org/r/20170519093915.15413-2-bp@alien8.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index e63873683d4a..f1f68c720675 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -264,6 +264,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s #endif int mce_available(struct cpuinfo_x86 *c); +bool mce_is_memory_error(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index af44ebeb593f..104cda1f9073 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -643,16 +643,14 @@ static void mce_read_aux(struct mce *m, int i) } } -static bool memory_error(struct mce *m) +bool mce_is_memory_error(struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - if (c->x86_vendor == X86_VENDOR_AMD) { + if (m->cpuvendor == X86_VENDOR_AMD) { /* ErrCodeExt[20:16] */ u8 xec = (m->status >> 16) & 0x1f; return (xec == 0x0 || xec == 0x8); - } else if (c->x86_vendor == X86_VENDOR_INTEL) { + } else if (m->cpuvendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes * @@ -673,6 +671,7 @@ static bool memory_error(struct mce *m) return false; } +EXPORT_SYMBOL_GPL(mce_is_memory_error); DEFINE_PER_CPU(unsigned, mce_poll_count); @@ -734,7 +733,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); - if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) + if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m)) if (m.status & MCI_STATUS_ADDRV) m.severity = severity; From 93da4e6c45f4189ee4b034a0f09e90a4285e6ee1 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Fri, 19 May 2017 11:39:10 +0200 Subject: [PATCH 396/465] acpi, nfit: Fix the memory error check in nfit_handle_mce() commit fc08a4703a418a398bbb575ac311d36d110ac786 upstream. The check for an MCE being a memory error in the NFIT mce handler was bogus. Use the new mce_is_memory_error() helper to detect the error properly. Reported-by: Tony Luck Signed-off-by: Vishal Verma Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170519093915.15413-3-bp@alien8.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index 3ba1c3472cf9..fd86bec98dea 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -26,7 +26,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, struct nfit_spa *nfit_spa; /* We only care about memory errors */ - if (!(mce->status & MCACOD)) + if (!mce_is_memory_error(mce)) return NOTIFY_DONE; /* From 34211cbf94b5e9bb5d52fdc7b95389fdcaa01221 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 25 Apr 2017 12:58:31 -0700 Subject: [PATCH 397/465] ACPI / sysfs: fix acpi_get_table() leak / acpi-sysfs denial of service commit 0de0e198bc7191a0e46cf71f66fec4d07ca91396 upstream. Reading an ACPI table through the /sys/firmware/acpi/tables interface more than 65,536 times leads to the following log message: ACPI Error: Table ffff88033595eaa8, Validation count is zero after increment (20170119/tbutils-423) ...and the table being unavailable until the next reboot. Add the missing acpi_put_table() so the table ->validation_count is decremented after each read. Reported-by: Anush Seetharaman Fixes: 174cc7187e6f "ACPICA: Tables: Back port acpi_get_table_with_size() ..." Signed-off-by: Dan Williams Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index cf05ae973381..5180fef9eb49 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -333,14 +333,17 @@ static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj, container_of(bin_attr, struct acpi_table_attr, attr); struct acpi_table_header *table_header = NULL; acpi_status status; + ssize_t rc; status = acpi_get_table(table_attr->name, table_attr->instance, &table_header); if (ACPI_FAILURE(status)) return -ENODEV; - return memory_read_from_buffer(buf, count, &offset, - table_header, table_header->length); + rc = memory_read_from_buffer(buf, count, &offset, table_header, + table_header->length); + acpi_put_table(table_header); + return rc; } static int acpi_table_attr_init(struct kobject *tables_obj, From 5d0e4205ea6c29a8ed72bf333662e6f3f34162d4 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 9 May 2017 13:57:31 +0800 Subject: [PATCH 398/465] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling commit 2ea65321b83539afc1d45c1bea39c55ab42af62b upstream. In the Linux kernel, acpi_get_table() "clones" haven't been fully balanced by acpi_put_table() invocations. In upstream ACPICA, due to the design change, there are also unbalanced acpi_get_table_by_index() invocations requiring special care. acpi_get_table() reference counting mismatches may occor due to that and printing error messages related to them is not useful at this point. The strict balanced validation count check should only be enabled after confirming that all invocations are safe and aligned with their designed purposes. Thus this patch removes the error value returned by acpi_tb_get_table() in that case along with the accompanying error message to fix the issue. Fixes: 174cc7187e6f (ACPICA: Tables: Back port acpi_get_table_with_size() and early_acpi_os_unmap_memory() from Linux kernel) Reported-by: Anush Seetharaman Reported-by: Dan Williams Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/tbutils.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 5a968a78652b..7abe66505739 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -418,11 +418,7 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc, table_desc->validation_count++; if (table_desc->validation_count == 0) { - ACPI_ERROR((AE_INFO, - "Table %p, Validation count is zero after increment\n", - table_desc)); table_desc->validation_count--; - return_ACPI_STATUS(AE_LIMIT); } *out_table = table_desc->pointer; From 4c5681afdf984b5eec41b280d43b9934af0f3144 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 10 May 2017 18:12:40 +0200 Subject: [PATCH 399/465] Revert "ACPI / button: Change default behavior to lid_init_state=open" commit 878d8db039daac0938238e9a40a5bd6e50ee3c9b upstream. Revert commit 77e9a4aa9de1 (ACPI / button: Change default behavior to lid_init_state=open) which changed the kernel's behavior on laptops that boot with closed lids and expect the lid switch state to be reported accurately by the kernel. If you boot or resume your laptop with the lid closed on a docking station while using an external monitor connected to it, both internal and external displays will light on, while only the external should. There is a design choice in gdm to only provide the greeter on the internal display when lit on, so users only see a gray area on the external monitor. Also, the cursor will not show up as it's by default on the internal display too. To "fix" that, users have to open the laptop once and close it once again to sync the state of the switch with the hardware state. Even if the "method" operation mode implementation can be buggy on some platforms, the "open" choice is worse. It breaks docking stations basically and there is no way to have a user-space hwdb to fix that. On the contrary, it's rather easy in user-space to have a hwdb with the problematic platforms. Then, libinput (1.7.0+) can fix the state of the lid switch for us: you need to set the udev property LIBINPUT_ATTR_LID_SWITCH_RELIABILITY to 'write_open'. When libinput detects internal keyboard events, it will overwrite the state of the switch to open, making it reliable again. Given that logind only checks the lid switch value after a timeout, we can assume the user will use the internal keyboard before this timeout expires. For example, such a hwdb entry is: libinput:name:*Lid Switch*:dmi:*svnMicrosoftCorporation:pnSurface3:* LIBINPUT_ATTR_LID_SWITCH_RELIABILITY=write_open Link: https://bugzilla.gnome.org/show_bug.cgi?id=782380 Signed-off-by: Benjamin Tissoires Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/button.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 6d5a8c1d3132..e19f530f1083 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -113,7 +113,7 @@ struct acpi_button { static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); static struct acpi_device *lid_device; -static u8 lid_init_state = ACPI_BUTTON_LID_INIT_OPEN; +static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD; static unsigned long lid_report_interval __read_mostly = 500; module_param(lid_report_interval, ulong, 0644); From 21f8aa4cfc764b64df81247d0c5e59eb7a5c9ead Mon Sep 17 00:00:00 2001 From: Srinath Mannam Date: Thu, 18 May 2017 22:27:40 +0530 Subject: [PATCH 400/465] mmc: sdhci-iproc: suppress spurious interrupt with Multiblock read commit f5f968f2371ccdebb8a365487649673c9af68d09 upstream. The stingray SDHCI hardware supports ACMD12 and automatically issues after multi block transfer completed. If ACMD12 in SDHCI is disabled, spurious tx done interrupts are seen on multi block read command with below error message: Got data interrupt 0x00000002 even though no data operation was in progress. This patch uses SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 to enable ACM12 support in SDHCI hardware and suppress spurious interrupt. Signed-off-by: Srinath Mannam Reviewed-by: Ray Jui Reviewed-by: Scott Branden Acked-by: Adrian Hunter Fixes: b580c52d58d9 ("mmc: sdhci-iproc: add IPROC SDHCI driver") Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-iproc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 3275d4995812..61666d269771 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -187,7 +187,8 @@ static const struct sdhci_iproc_data iproc_cygnus_data = { }; static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = { - .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | + SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN, .ops = &sdhci_iproc_ops, }; From a168ac5b24bf7724b2fd49ec5362c4c0ff169d94 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 18 May 2017 15:40:05 -0700 Subject: [PATCH 401/465] scsi: zero per-cmd private driver data for each MQ I/O commit 1bad6c4a57efda0d5f5bf8a2403b21b1ed24875c upstream. In lower layer driver's (LLD) scsi_host_template, the driver may optionally ask SCSI to allocate its private driver memory for each command, by specifying cmd_size. This memory is allocated at the end of scsi_cmnd by SCSI. Later when SCSI queues a command, the LLD can use scsi_cmd_priv to get to its private data. Some LLD, e.g. hv_storvsc, doesn't clear its private data before use. In this case, the LLD may get to stale or uninitialized data in its private driver memory. This may result in unexpected driver and hardware behavior. Fix this problem by also zeroing the private driver memory before passing them to LLD. Signed-off-by: Long Li Reviewed-by: Bart Van Assche Reviewed-by: KY Srinivasan Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 15c9fe766071..464e7a989cc5 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1850,7 +1850,7 @@ static int scsi_mq_prep_fn(struct request *req) /* zero out the cmd, except for the embedded scsi_request */ memset((char *)cmd + sizeof(cmd->req), 0, - sizeof(*cmd) - sizeof(cmd->req)); + sizeof(*cmd) - sizeof(cmd->req) + shost->hostt->cmd_size); req->special = cmd; From c732f3088710cbde225370473893c5f127fd1aca Mon Sep 17 00:00:00 2001 From: Jiang Yi Date: Tue, 16 May 2017 17:57:55 +0800 Subject: [PATCH 402/465] iscsi-target: Always wait for kthread_should_stop() before kthread exit commit 5e0cf5e6c43b9e19fc0284f69e5cd2b4a47523b0 upstream. There are three timing problems in the kthread usages of iscsi_target_mod: - np_thread of struct iscsi_np - rx_thread and tx_thread of struct iscsi_conn In iscsit_close_connection(), it calls send_sig(SIGINT, conn->tx_thread, 1); kthread_stop(conn->tx_thread); In conn->tx_thread, which is iscsi_target_tx_thread(), when it receive SIGINT the kthread will exit without checking the return value of kthread_should_stop(). So if iscsi_target_tx_thread() exit right between send_sig(SIGINT...) and kthread_stop(...), the kthread_stop() will try to stop an already stopped kthread. This is invalid according to the documentation of kthread_stop(). (Fix -ECONNRESET logout handling in iscsi_target_tx_thread and early iscsi_target_rx_thread failure case - nab) Signed-off-by: Jiang Yi Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 30 ++++++++++++++++++----- drivers/target/iscsi/iscsi_target_erl0.c | 6 ++++- drivers/target/iscsi/iscsi_target_erl0.h | 2 +- drivers/target/iscsi/iscsi_target_login.c | 4 +++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 8beed3451346..fd45b48480cb 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3810,6 +3810,8 @@ int iscsi_target_tx_thread(void *arg) { int ret = 0; struct iscsi_conn *conn = arg; + bool conn_freed = false; + /* * Allow ourselves to be interrupted by SIGINT so that a * connection recovery / failure event can be triggered externally. @@ -3835,12 +3837,14 @@ int iscsi_target_tx_thread(void *arg) goto transport_err; ret = iscsit_handle_response_queue(conn); - if (ret == 1) + if (ret == 1) { goto get_immediate; - else if (ret == -ECONNRESET) + } else if (ret == -ECONNRESET) { + conn_freed = true; goto out; - else if (ret < 0) + } else if (ret < 0) { goto transport_err; + } } transport_err: @@ -3850,8 +3854,13 @@ int iscsi_target_tx_thread(void *arg) * responsible for cleaning up the early connection failure. */ if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } return 0; } @@ -4024,6 +4033,7 @@ int iscsi_target_rx_thread(void *arg) { int rc; struct iscsi_conn *conn = arg; + bool conn_freed = false; /* * Allow ourselves to be interrupted by SIGINT so that a @@ -4036,7 +4046,7 @@ int iscsi_target_rx_thread(void *arg) */ rc = wait_for_completion_interruptible(&conn->rx_login_comp); if (rc < 0 || iscsi_target_check_conn_state(conn)) - return 0; + goto out; if (!conn->conn_transport->iscsit_get_rx_pdu) return 0; @@ -4045,7 +4055,15 @@ int iscsi_target_rx_thread(void *arg) if (!signal_pending(current)) atomic_set(&conn->transport_failed, 1); - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); + +out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } + return 0; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 9a96e17bf7cd..7fe2aa73cff6 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -930,8 +930,10 @@ static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn) } } -void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) +void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn, bool *conn_freed) { + *conn_freed = false; + spin_lock_bh(&conn->state_lock); if (atomic_read(&conn->connection_exit)) { spin_unlock_bh(&conn->state_lock); @@ -942,6 +944,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { spin_unlock_bh(&conn->state_lock); iscsit_close_connection(conn); + *conn_freed = true; return; } @@ -955,4 +958,5 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) spin_unlock_bh(&conn->state_lock); iscsit_handle_connection_cleanup(conn); + *conn_freed = true; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h index 60e69e2af6ed..3822d9cd1230 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.h +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -15,6 +15,6 @@ extern int iscsit_stop_time2retain_timer(struct iscsi_session *); extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); extern void iscsit_fall_back_to_erl0(struct iscsi_session *); -extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *, bool *); #endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 66238477137b..92b96b51d506 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1464,5 +1464,9 @@ int iscsi_target_login_thread(void *arg) break; } + while (!kthread_should_stop()) { + msleep(100); + } + return 0; } From 14ba78937e8c3fa1b3d1b75206f884ace830caec Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 24 May 2017 21:47:09 -0700 Subject: [PATCH 403/465] iscsi-target: Fix initial login PDU asynchronous socket close OOPs commit 25cdda95fda78d22d44157da15aa7ea34be3c804 upstream. This patch fixes a OOPs originally introduced by: commit bb048357dad6d604520c91586334c9c230366a14 Author: Nicholas Bellinger Date: Thu Sep 5 14:54:04 2013 -0700 iscsi-target: Add sk->sk_state_change to cleanup after TCP failure which would trigger a NULL pointer dereference when a TCP connection was closed asynchronously via iscsi_target_sk_state_change(), but only when the initial PDU processing in iscsi_target_do_login() from iscsi_np process context was blocked waiting for backend I/O to complete. To address this issue, this patch makes the following changes. First, it introduces some common helper functions used for checking socket closing state, checking login_flags, and atomically checking socket closing state + setting login_flags. Second, it introduces a LOGIN_FLAGS_INITIAL_PDU bit to know when a TCP connection has dropped via iscsi_target_sk_state_change(), but the initial PDU processing within iscsi_target_do_login() in iscsi_np context is still running. For this case, it sets LOGIN_FLAGS_CLOSED, but doesn't invoke schedule_delayed_work(). The original NULL pointer dereference case reported by MNC is now handled by iscsi_target_do_login() doing a iscsi_target_sk_check_close() before transitioning to FFP to determine when the socket has already closed, or iscsi_target_start_negotiation() if the login needs to exchange more PDUs (eg: iscsi_target_do_login returned 0) but the socket has closed. For both of these cases, the cleanup up of remaining connection resources will occur in iscsi_target_start_negotiation() from iscsi_np process context once the failure is detected. Finally, to handle to case where iscsi_target_sk_state_change() is called after the initial PDU procesing is complete, it now invokes conn->login_work -> iscsi_target_do_login_rx() to perform cleanup once existing iscsi_target_sk_check_close() checks detect connection failure. For this case, the cleanup of remaining connection resources will occur in iscsi_target_do_login_rx() from delayed workqueue process context once the failure is detected. Reported-by: Mike Christie Reviewed-by: Mike Christie Tested-by: Mike Christie Cc: Mike Christie Reported-by: Hannes Reinecke Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Varun Prakash Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_nego.c | 194 +++++++++++++++-------- include/target/iscsi/iscsi_target_core.h | 1 + 2 files changed, 133 insertions(+), 62 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 7ccc9c1cbfd1..6f88b31242b0 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -493,14 +493,60 @@ static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); -static bool iscsi_target_sk_state_check(struct sock *sk) +static bool __iscsi_target_sk_check_close(struct sock *sk) { if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { - pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE," "returning FALSE\n"); - return false; + return true; } - return true; + return false; +} + +static bool iscsi_target_sk_check_close(struct iscsi_conn *conn) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_flag(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = test_bit(flag, &conn->login_flags); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + if (!state) + clear_bit(flag, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + return state; } static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) @@ -540,6 +586,20 @@ static void iscsi_target_do_login_rx(struct work_struct *work) pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", conn, current->comm, current->pid); + /* + * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready() + * before initial PDU processing in iscsi_target_start_negotiation() + * has completed, go ahead and retry until it's cleared. + * + * Otherwise if the TCP connection drops while this is occuring, + * iscsi_target_start_negotiation() will detect the failure, call + * cancel_delayed_work_sync(&conn->login_work), and cleanup the + * remaining iscsi connection resources from iscsi_np process context. + */ + if (iscsi_target_sk_check_flag(conn, LOGIN_FLAGS_INITIAL_PDU)) { + schedule_delayed_work(&conn->login_work, msecs_to_jiffies(10)); + return; + } spin_lock(&tpg->tpg_state_lock); state = (tpg->tpg_state == TPG_STATE_ACTIVE); @@ -547,26 +607,12 @@ static void iscsi_target_do_login_rx(struct work_struct *work) if (!state) { pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; + goto err; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (iscsi_target_sk_check_close(conn)) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + goto err; } conn->login_kworker = current; @@ -584,34 +630,29 @@ static void iscsi_target_do_login_rx(struct work_struct *work) flush_signals(current); conn->login_kworker = NULL; - if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (rc < 0) + goto err; pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", conn, current->comm, current->pid); rc = iscsi_target_do_login(conn, login); if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); + goto err; } else if (!rc) { - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } + if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE)) + goto err; } else if (rc == 1) { iscsi_target_nego_release(conn); iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } + return; + +err: + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); } static void iscsi_target_do_cleanup(struct work_struct *work) @@ -659,31 +700,54 @@ static void iscsi_target_sk_state_change(struct sock *sk) orig_state_change(sk); return; } + state = __iscsi_target_sk_check_close(sk); + pr_debug("__iscsi_target_sk_close_change: state: %d\n", state); + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" " conn: %p\n", conn); + if (state) + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } - if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", conn); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } + /* + * If the TCP connection has dropped, go ahead and set LOGIN_FLAGS_CLOSED, + * but only queue conn->login_work -> iscsi_target_do_login_rx() + * processing if LOGIN_FLAGS_INITIAL_PDU has already been cleared. + * + * When iscsi_target_do_login_rx() runs, iscsi_target_sk_check_close() + * will detect the dropped TCP connection from delayed workqueue context. + * + * If LOGIN_FLAGS_INITIAL_PDU is still set, which means the initial + * iscsi_target_start_negotiation() is running, iscsi_target_do_login() + * via iscsi_target_sk_check_close() or iscsi_target_start_negotiation() + * via iscsi_target_sk_check_and_clear() is responsible for detecting the + * dropped TCP connection in iscsi_np process context, and cleaning up + * the remaining iscsi connection resources. + */ + if (state) { + pr_debug("iscsi_target_sk_state_change got failed state\n"); + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); + state = test_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - write_unlock_bh(&sk->sk_callback_lock); - - pr_debug("iscsi_target_sk_state_change: state: %d\n", state); + orig_state_change(sk); - if (!state) { - pr_debug("iscsi_target_sk_state_change got failed state\n"); - schedule_delayed_work(&conn->login_cleanup_work, 0); + if (!state) + schedule_delayed_work(&conn->login_work, 0); return; } + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); } @@ -946,6 +1010,15 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (iscsi_target_handle_csg_one(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + /* + * Check to make sure the TCP connection has not + * dropped asynchronously while session reinstatement + * was occuring in this kthread context, before + * transitioning to full feature phase operation. + */ + if (iscsi_target_sk_check_close(conn)) + return -1; + login->tsih = conn->sess->tsih; login->login_complete = 1; iscsi_target_restore_sock_callbacks(conn); @@ -972,21 +1045,6 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - bool state; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login() failed state for" - " conn: %p\n", conn); - return -1; - } - } - return 0; } @@ -1255,10 +1313,22 @@ int iscsi_target_start_negotiation( write_lock_bh(&sk->sk_callback_lock); set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); } - + /* + * If iscsi_target_do_login returns zero to signal more PDU + * exchanges are required to complete the login, go ahead and + * clear LOGIN_FLAGS_INITIAL_PDU but only if the TCP connection + * is still active. + * + * Otherwise if TCP connection dropped asynchronously, go ahead + * and perform connection cleanup now. + */ ret = iscsi_target_do_login(conn, login); + if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU)) + ret = -1; + if (ret < 0) { cancel_delayed_work_sync(&conn->login_work); cancel_delayed_work_sync(&conn->login_cleanup_work); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 275581d483dd..5f17fb770477 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -557,6 +557,7 @@ struct iscsi_conn { #define LOGIN_FLAGS_READ_ACTIVE 1 #define LOGIN_FLAGS_CLOSED 2 #define LOGIN_FLAGS_READY 4 +#define LOGIN_FLAGS_INITIAL_PDU 8 unsigned long login_flags; struct delayed_work login_work; struct delayed_work login_cleanup_work; From 1fb66c6aad5fe7c5e62cc0cd27cc3c6936ef8601 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Sat, 20 May 2017 09:58:10 +0200 Subject: [PATCH 404/465] scsi: scsi_dh_rdac: Use ctlr directly in rdac_failover_get() commit 0648a07c9b22acc33ead0645cf8f607b0c9c7e32 upstream. rdac_failover_get references struct rdac_controller as ctlr->ms_sdev->handler_data->ctlr for no apparent reason. Besides being inefficient this also introduces a null-pointer dereference as send_mode_select() sets ctlr->ms_sdev to NULL before calling rdac_failover_get(): [ 18.432550] device-mapper: multipath service-time: version 0.3.0 loaded [ 18.436124] BUG: unable to handle kernel NULL pointer dereference at 0000000000000790 [ 18.436129] IP: send_mode_select+0xca/0x560 [ 18.436129] PGD 0 [ 18.436130] P4D 0 [ 18.436130] [ 18.436132] Oops: 0000 [#1] SMP [ 18.436133] Modules linked in: dm_service_time sd_mod dm_multipath amdkfd amd_iommu_v2 radeon(+) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qla2xxx drm serio_raw scsi_transport_fc bnx2 i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 18.436143] CPU: 4 PID: 443 Comm: kworker/u16:2 Not tainted 4.12.0-rc1.1.el7.test.x86_64 #1 [ 18.436144] Hardware name: IBM BladeCenter LS22 -[79013SG]-/Server Blade, BIOS -[L8E164AUS-1.07]- 05/25/2011 [ 18.436145] Workqueue: kmpath_rdacd send_mode_select [ 18.436146] task: ffff880225116a40 task.stack: ffffc90002bd8000 [ 18.436148] RIP: 0010:send_mode_select+0xca/0x560 [ 18.436148] RSP: 0018:ffffc90002bdbda8 EFLAGS: 00010246 [ 18.436149] RAX: 0000000000000000 RBX: ffffc90002bdbe08 RCX: ffff88017ef04a80 [ 18.436150] RDX: ffffc90002bdbe08 RSI: ffff88017ef04a80 RDI: ffff8802248e4388 [ 18.436151] RBP: ffffc90002bdbe48 R08: 0000000000000000 R09: ffffffff81c104c0 [ 18.436151] R10: 00000000000001ff R11: 000000000000035a R12: ffffc90002bdbdd8 [ 18.436152] R13: ffff8802248e4390 R14: ffff880225152800 R15: ffff8802248e4400 [ 18.436153] FS: 0000000000000000(0000) GS:ffff880227d00000(0000) knlGS:0000000000000000 [ 18.436154] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.436154] CR2: 0000000000000790 CR3: 000000042535b000 CR4: 00000000000006e0 [ 18.436155] Call Trace: [ 18.436159] ? rdac_activate+0x14e/0x150 [ 18.436161] ? refcount_dec_and_test+0x11/0x20 [ 18.436162] ? kobject_put+0x1c/0x50 [ 18.436165] ? scsi_dh_activate+0x6f/0xd0 [ 18.436168] process_one_work+0x149/0x360 [ 18.436170] worker_thread+0x4d/0x3c0 [ 18.436172] kthread+0x109/0x140 [ 18.436173] ? rescuer_thread+0x380/0x380 [ 18.436174] ? kthread_park+0x60/0x60 [ 18.436176] ret_from_fork+0x2c/0x40 [ 18.436177] Code: 49 c7 46 20 00 00 00 00 4c 89 ef c6 07 00 0f 1f 40 00 45 31 ed c7 45 b0 05 00 00 00 44 89 6d b4 4d 89 f5 4c 8b 75 a8 49 8b 45 20 <48> 8b b0 90 07 00 00 48 8b 56 10 8b 42 10 48 8d 7a 28 85 c0 0f [ 18.436192] RIP: send_mode_select+0xca/0x560 RSP: ffffc90002bdbda8 [ 18.436192] CR2: 0000000000000790 [ 18.436198] ---[ end trace 40f3e4dca1ffabdd ]--- [ 18.436199] Kernel panic - not syncing: Fatal exception [ 18.436222] Kernel Offset: disabled [-- MARK -- Thu May 18 11:45:00 2017] Fixes: 327825574132 scsi_dh_rdac: switch to scsi_execute_req_flags() Signed-off-by: Artem Savkov Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/device_handler/scsi_dh_rdac.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 3cbab8710e58..2ceff585f189 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -265,18 +265,16 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, struct list_head *list, unsigned char *cdb) { - struct scsi_device *sdev = ctlr->ms_sdev; - struct rdac_dh_data *h = sdev->handler_data; struct rdac_mode_common *common; unsigned data_size; struct rdac_queue_data *qdata; u8 *lun_table; - if (h->ctlr->use_ms10) { + if (ctlr->use_ms10) { struct rdac_pg_expanded *rdac_pg; data_size = sizeof(struct rdac_pg_expanded); - rdac_pg = &h->ctlr->mode_select.expanded; + rdac_pg = &ctlr->mode_select.expanded; memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40; @@ -288,7 +286,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, struct rdac_pg_legacy *rdac_pg; data_size = sizeof(struct rdac_pg_legacy); - rdac_pg = &h->ctlr->mode_select.legacy; + rdac_pg = &ctlr->mode_select.legacy; memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER; @@ -304,7 +302,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, } /* Prepare the command. */ - if (h->ctlr->use_ms10) { + if (ctlr->use_ms10) { cdb[0] = MODE_SELECT_10; cdb[7] = data_size >> 8; cdb[8] = data_size & 0xff; From e920be8367e2cbc52d26969450544f993a3e7a5d Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Tue, 9 May 2017 11:50:26 -0500 Subject: [PATCH 405/465] ibmvscsis: Clear left-over abort_cmd pointers commit 98883f1b5415ea9dce60d5178877d15f4faa10b8 upstream. With the addition of ibmvscsis->abort_cmd pointer within commit 25e78531268e ("ibmvscsis: Do not send aborted task response"), make sure to explicitly NULL these pointers when clearing DELAY_SEND flag. Do this for two cases, when getting the new new ibmvscsis descriptor in ibmvscsis_get_free_cmd() and before posting the response completion in ibmvscsis_send_messages(). Signed-off-by: Bryant G. Ly Reviewed-by: Michael Cyr Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index d390325c99ec..ee64241865e6 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1170,6 +1170,8 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi) cmd = list_first_entry_or_null(&vscsi->free_cmd, struct ibmvscsis_cmd, list); if (cmd) { + if (cmd->abort_cmd) + cmd->abort_cmd = NULL; cmd->flags &= ~(DELAY_SEND); list_del(&cmd->list); cmd->iue = iue; @@ -1774,6 +1776,7 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi) if (cmd->abort_cmd) { retry = true; cmd->abort_cmd->flags &= ~(DELAY_SEND); + cmd->abort_cmd = NULL; } /* From 8d975ebd0a928b7033ac343a1692b253700391c1 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Wed, 10 May 2017 14:35:47 -0500 Subject: [PATCH 406/465] ibmvscsis: Fix the incorrect req_lim_delta commit 75dbf2d36f6b122ad3c1070fe4bf95f71bbff321 upstream. The current code is not correctly calculating the req_lim_delta. We want to make sure vscsi->credit is always incremented when we do not send a response for the scsi op. Thus for the case where there is a successfully aborted task we need to make sure the vscsi->credit is incremented. v2 - Moves the original location of the vscsi->credit increment to a better spot. Since if we increment credit, the next command we send back will have increased req_lim_delta. But we probably shouldn't be doing that until the aborted cmd is actually released. Otherwise the client will think that it can send a new command, and we could find ourselves short of command elements. Not likely, but could happen. This patch depends on both: commit 25e78531268e ("ibmvscsis: Do not send aborted task response") commit 98883f1b5415 ("ibmvscsis: Clear left-over abort_cmd pointers") Signed-off-by: Bryant G. Ly Reviewed-by: Michael Cyr Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index ee64241865e6..abf6026645dd 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1791,6 +1791,25 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi) list_del(&cmd->list); ibmvscsis_free_cmd_resources(vscsi, cmd); + /* + * With a successfully aborted op + * through LIO we want to increment the + * the vscsi credit so that when we dont + * send a rsp to the original scsi abort + * op (h_send_crq), but the tm rsp to + * the abort is sent, the credit is + * correctly sent with the abort tm rsp. + * We would need 1 for the abort tm rsp + * and 1 credit for the aborted scsi op. + * Thus we need to increment here. + * Also we want to increment the credit + * here because we want to make sure + * cmd is actually released first + * otherwise the client will think it + * it can send a new cmd, and we could + * find ourselves short of cmd elements. + */ + vscsi->credit += 1; } else { iue = cmd->iue; @@ -2965,10 +2984,7 @@ static long srp_build_response(struct scsi_info *vscsi, rsp->opcode = SRP_RSP; - if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING) - rsp->req_lim_delta = cpu_to_be32(vscsi->credit); - else - rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); + rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); rsp->tag = cmd->rsp.tag; rsp->flags = 0; From f88d3d6e4f8407d3a09390eb8db7ffb37e46bb95 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 25 Apr 2017 11:29:56 -0700 Subject: [PATCH 407/465] HID: wacom: Have wacom_tpc_irq guard against possible NULL dereference commit 2ac97f0f6654da14312d125005c77a6010e0ea38 upstream. The following Smatch complaint was generated in response to commit 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device"): drivers/hid/wacom_wac.c:1586 wacom_tpc_irq() error: we previously assumed 'wacom->touch_input' could be null (see line 1577) The 'touch_input' and 'pen_input' variables point to the 'struct input_dev' used for relaying touch and pen events to userspace, respectively. If a device does not have a touch interface or pen interface, the associated input variable is NULL. The 'wacom_tpc_irq()' function is responsible for forwarding input reports to a more-specific IRQ handler function. An unknown report could theoretically be mistaken as e.g. a touch report on a device which does not have a touch interface. This can be prevented by only calling the pen/touch functions are called when the pen/touch pointers are valid. Fixes: 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device") Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 45 +++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index c68ac65db7ff..4859926332c1 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1571,37 +1571,38 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len) { unsigned char *data = wacom->data; - if (wacom->pen_input) + if (wacom->pen_input) { dev_dbg(wacom->pen_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - else if (wacom->touch_input) + + if (len == WACOM_PKGLEN_PENABLED || + data[0] == WACOM_REPORT_PENABLED) + return wacom_tpc_pen(wacom); + } + else if (wacom->touch_input) { dev_dbg(wacom->touch_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - switch (len) { - case WACOM_PKGLEN_TPC1FG: - return wacom_tpc_single_touch(wacom, len); + switch (len) { + case WACOM_PKGLEN_TPC1FG: + return wacom_tpc_single_touch(wacom, len); - case WACOM_PKGLEN_TPC2FG: - return wacom_tpc_mt_touch(wacom); + case WACOM_PKGLEN_TPC2FG: + return wacom_tpc_mt_touch(wacom); - case WACOM_PKGLEN_PENABLED: - return wacom_tpc_pen(wacom); + default: + switch (data[0]) { + case WACOM_REPORT_TPC1FG: + case WACOM_REPORT_TPCHID: + case WACOM_REPORT_TPCST: + case WACOM_REPORT_TPC1FGE: + return wacom_tpc_single_touch(wacom, len); - default: - switch (data[0]) { - case WACOM_REPORT_TPC1FG: - case WACOM_REPORT_TPCHID: - case WACOM_REPORT_TPCST: - case WACOM_REPORT_TPC1FGE: - return wacom_tpc_single_touch(wacom, len); - - case WACOM_REPORT_TPCMT: - case WACOM_REPORT_TPCMT2: - return wacom_mt_touch(wacom); + case WACOM_REPORT_TPCMT: + case WACOM_REPORT_TPCMT2: + return wacom_mt_touch(wacom); - case WACOM_REPORT_PENABLED: - return wacom_tpc_pen(wacom); + } } } From 0fe9c5519553ee65b1e8ab137d23c07ee4479d05 Mon Sep 17 00:00:00 2001 From: Marta Rybczynska Date: Mon, 10 Apr 2017 17:12:34 +0200 Subject: [PATCH 408/465] nvme-rdma: support devices with queue size < 32 commit 0544f5494a03b8846db74e02be5685d1f32b06c9 upstream. In the case of small NVMe-oF queue size (<32) we may enter a deadlock caused by the fact that the IB completions aren't sent waiting for 32 and the send queue will fill up. The error is seen as (using mlx5): [ 2048.693355] mlx5_0:mlx5_ib_post_send:3765:(pid 7273): [ 2048.693360] nvme nvme1: nvme_rdma_post_send failed with error code -12 This patch changes the way the signaling is done so that it depends on the queue depth now. The magic define has been removed completely. Signed-off-by: Marta Rybczynska Signed-off-by: Samuel Jones Acked-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/rdma.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 16f84eb0b95e..0aba367ae465 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1029,6 +1029,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } +static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) +{ + int sig_limit; + + /* + * We signal completion every queue depth/2 and also handle the + * degenerated case of a device with queue_depth=1, where we + * would need to signal every message. + */ + sig_limit = max(queue->queue_size / 2, 1); + return (++queue->sig_count % sig_limit) == 0; +} + static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, struct ib_send_wr *first, bool flush) @@ -1056,9 +1069,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, * Would have been way to obvious to handle this in hardware or * at least the RDMA stack.. * - * This messy and racy code sniplet is copy and pasted from the iSER - * initiator, and the magic '32' comes from there as well. - * * Always signal the flushes. The magic request used for the flush * sequencer is not allocated in our driver's tagset and it's * triggered to be freed by blk_cleanup_queue(). So we need to @@ -1066,7 +1076,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, * embedded in request's payload, is not freed when __ib_process_cq() * calls wr_cqe->done(). */ - if ((++queue->sig_count % 32) == 0 || flush) + if (nvme_rdma_queue_sig_limit(queue) || flush) wr.send_flags |= IB_SEND_SIGNALED; if (first) From 20c03f455c786c47f64fc402e698e5c6a7878e33 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:03 +0800 Subject: [PATCH 409/465] nvme: use blk_mq_start_hw_queues() in nvme_kill_queues() commit 806f026f9b901eaf1a6baeb48b5da18d6a4f818e upstream. Inside nvme_kill_queues(), we have to start hw queues for draining requests in sw queues, .dispatch list and requeue list, so use blk_mq_start_hw_queues() instead of blk_mq_start_stopped_hw_queues() which only run queues if queues are stopped, but the queues may have been started already, for example nvme_start_queues() is called in reset work function. blk_mq_start_hw_queues() run hw queues in current context, instead of running asynchronously like before. Given nvme_kill_queues() is run from either remove context or reset worker context, both are fine to run hw queue directly. And the mutex of namespaces_mutex isn't a problem too becasue nvme_start_freeze() runs hw queue in this way already. Reported-by: Zhang Yi Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eeb409c287b8..c20bbb4c78c2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2345,7 +2345,13 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); blk_mq_abort_requeue_list(ns->queue); - blk_mq_start_stopped_hw_queues(ns->queue, true); + + /* + * Forcibly start all queues to avoid having stuck requests. + * Note that we must ensure the queues are not stopped + * when the final removal happens. + */ + blk_mq_start_hw_queues(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } From a8aa8a0c107a7bcf3380ca3633debec2607bb6fb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:04 +0800 Subject: [PATCH 410/465] nvme: avoid to use blk_mq_abort_requeue_list() commit 986f75c876dbafed98eba7cb516c5118f155db23 upstream. NVMe may add request into requeue list simply and not kick off the requeue if hw queues are stopped. Then blk_mq_abort_requeue_list() is called in both nvme_kill_queues() and nvme_ns_remove() for dealing with this issue. Unfortunately blk_mq_abort_requeue_list() is absolutely a race maker, for example, one request may be requeued during the aborting. So this patch just calls blk_mq_kick_requeue_list() in nvme_kill_queues() to handle this issue like what nvme_start_queues() does. Now all requests in requeue list when queues are stopped will be handled by blk_mq_kick_requeue_list() when queues are restarted, either in nvme_start_queues() or in nvme_kill_queues(). Reported-by: Zhang Yi Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c20bbb4c78c2..b3d3c5c2c92f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2006,7 +2006,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (ns->ndev) nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); - blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); } @@ -2344,7 +2343,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) continue; revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); - blk_mq_abort_requeue_list(ns->queue); /* * Forcibly start all queues to avoid having stuck requests. @@ -2352,6 +2350,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) * when the final removal happens. */ blk_mq_start_hw_queues(ns->queue); + + /* draining requests in requeue list */ + blk_mq_kick_requeue_list(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } From 662dbfcc66ba8d2d210257f9f1aa737c01fa5858 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:46:12 -0400 Subject: [PATCH 411/465] drm/amd/powerplay/smu7: add vblank check for mclk switching (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 09be4a5219610a6fae3215d4f51f948d6f5d2609 upstream. Check to make sure the vblank period is long enough to support mclk switching. v2: drop needless initial assignment (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Acked-by: Christian König Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index f75ee33ec5bb..c794132014a9 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2656,6 +2656,28 @@ static int smu7_get_power_state_size(struct pp_hwmgr *hwmgr) return sizeof(struct smu7_power_state); } +static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, + uint32_t vblank_time_us) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t switch_limit_us; + + switch (hwmgr->chip_id) { + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + switch_limit_us = data->is_memory_gddr5 ? 190 : 150; + break; + default: + switch_limit_us = data->is_memory_gddr5 ? 450 : 150; + break; + } + + if (vblank_time_us < switch_limit_us) + return true; + else + return false; +} static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *request_ps, @@ -2670,6 +2692,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, bool disable_mclk_switching; bool disable_mclk_switching_for_frame_lock; struct cgs_display_info info = {0}; + struct cgs_mode_info mode_info = {0}; const struct phm_clock_and_voltage_limits *max_limits; uint32_t i; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -2678,6 +2701,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, int32_t count; int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0; + info.mode_info = &mode_info; data->battery_state = (PP_StateUILabel_Battery == request_ps->classification.ui_label); @@ -2704,8 +2728,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, cgs_get_active_displays_info(hwmgr->device, &info); - /*TO DO result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/ - minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; @@ -2770,8 +2792,9 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - disable_mclk_switching = (1 < info.display_count) || - disable_mclk_switching_for_frame_lock; + disable_mclk_switching = ((1 < info.display_count) || + disable_mclk_switching_for_frame_lock || + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; From d6ba1a4407beb8c82d7b2a31b878ff1673cb4a37 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:57:41 -0400 Subject: [PATCH 412/465] drm/amd/powerplay/smu7: disable mclk switching for high refresh rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2275a3a2fe9914ba6d76c8ea490da3c08342bd19 upstream. Even if the vblank period would allow it, it still seems to be problematic on some cards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c794132014a9..98bfa5e41e3b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2794,7 +2794,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, disable_mclk_switching = ((1 < info.display_count) || disable_mclk_switching_for_frame_lock || - smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) || + (mode_info.refresh_rate > 120)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; From 6b693bbf9cd900e0e94b9b11711a15e61f84022f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:14:14 -0400 Subject: [PATCH 413/465] drm/radeon/ci: disable mclk switching for high refresh rates (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 58d7e3e427db1bd68f33025519a9468140280a75 upstream. Even if the vblank period would allow it, it still seems to be problematic on some cards. v2: fix logic inversion (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/ci_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 7ba450832e6b..ea36dc4dd5d2 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev) u32 vblank_time = r600_dpm_get_vblank_time(rdev); u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (r600_dpm_get_vrefresh(rdev) > 120) + return true; + if (vblank_time < switch_limit) return true; else From 2609770993c186efef6f52ca3ed3d8bbb7d41061 Mon Sep 17 00:00:00 2001 From: Lyude Date: Thu, 11 May 2017 19:31:12 -0400 Subject: [PATCH 414/465] drm/radeon: Unbreak HPD handling for r600+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3d18e33735a02b1a90aecf14410bf3edbfd4d3dc upstream. We end up reading the interrupt register for HPD5, and then writing it to HPD6 which on systems without anything using HPD5 results in permanently disabling hotplug on one of the display outputs after the first time we acknowledge a hotplug interrupt from the GPU. This code is really bad. But for now, let's just fix this. I will hopefully have a large patch series to refactor all of this soon. Reviewed-by: Christian König Signed-off-by: Lyude Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/evergreen.c | 4 ++-- drivers/gpu/drm/radeon/r600.c | 2 +- drivers/gpu/drm/radeon/si.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index f6ff41a0eed6..edee6a5f4da9 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7416,7 +7416,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -7446,7 +7446,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 0b6b5766216f..6068b8a01016 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4933,7 +4933,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -4964,7 +4964,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index a951881c2a50..f2eac6b6c46a 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3995,7 +3995,7 @@ static void r600_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 414776811e71..c8047548f8be 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6345,7 +6345,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -6376,7 +6376,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } From d5bc54d0a373dfb4000b755969e9dbe7524799e0 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 2 Jun 2017 14:46:28 -0700 Subject: [PATCH 415/465] pcmcia: remove left-over %Z format commit ff5a20169b98d84ad8d7f99f27c5ebbb008204d6 upstream. Commit 5b5e0928f742 ("lib/vsprintf.c: remove %Z support") removed some usages of format %Z but forgot "%.2Zx". This makes clang 4.0 reports a -Wformat-extra-args warning because it does not know about %Z. Replace %Z with %z. Link: http://lkml.kernel.org/r/20170520090946.22562-1-nicolas.iooss_linux@m4x.org Signed-off-by: Nicolas Iooss Cc: Harald Welte Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/char/pcmcia/cm4040_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index d4dbd8d8e524..382c864814d9 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -374,7 +374,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_START, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; @@ -387,7 +387,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, for (i = 0; i < bytes_to_write; i++) { rc = wait_for_bulk_out_ready(dev); if (rc <= 0) { - DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n", + DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) @@ -403,7 +403,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; From 0c4afdc6d82d3790abb4ef0e1b250bbfbc66377f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 May 2017 09:11:33 +0200 Subject: [PATCH 416/465] ALSA: hda - No loopback on ALC299 codec commit fa16b69f1299004b60b625f181143500a246e5cb upstream. ALC299 has no loopback mixer, but the driver still tries to add a beep control over the mixer NID which leads to the error at accessing it. This patch fixes it by properly declaring mixer_nid=0 for this codec. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195775 Fixes: 28f1f9b26cee ("ALSA: hda/realtek - Add new codec ID ALC299") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 299835d1fbaa..1aa21d7e7245 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6276,8 +6276,11 @@ static int patch_alc269(struct hda_codec *codec) break; case 0x10ec0225: case 0x10ec0295: + spec->codec_variant = ALC269_TYPE_ALC225; + break; case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; + spec->gen.mixer_nid = 0; /* no loopback on ALC299 */ break; case 0x10ec0234: case 0x10ec0274: From ffb97b001b1575a42eded6ebc937a3b1897a60f4 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 22 May 2017 20:58:11 +0300 Subject: [PATCH 417/465] ALSA: hda - apply STAC_9200_DELL_M22 quirk for Dell Latitude D430 commit 1fc2e41f7af4572b07190f9dec28396b418e9a36 upstream. This model is actually called 92XXM2-8 in Windows driver. But since pin configs for M22 and M28 are identical, just reuse M22 quirk. Fixes external microphone (tested) and probably docking station ports (not tested). Signed-off-by: Alexander Tsoy Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index faa3d38bac0b..6cefdf6c0b75 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1559,6 +1559,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = { "Dell Inspiron 1501", STAC_9200_DELL_M26), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6, "unknown Dell", STAC_9200_DELL_M26), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201, + "Dell Latitude D430", STAC_9200_DELL_M22), /* Panasonic */ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC), /* Gateway machines needs EAPD to be set on resume */ From 0be9a9a4229d66e34cc5bdbf1143794c5e48a79a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 09:23:41 +0200 Subject: [PATCH 418/465] Revert "ALSA: usb-audio: purge needless variable length array" commit 64188cfbe5245d412de2139a3864e4e00b4136f0 upstream. This reverts commit 89b593c30e83 ("ALSA: usb-audio: purge needless variable length array"). The patch turned out to cause a severe regression, triggering an Oops at snd_usb_ctl_msg(). It was overseen that snd_usb_ctl_msg() writes back the response to the given buffer, while the patch changed it to a read-only const buffer. (One should always double-check when an extra pointer cast is present...) As a simple fix, just revert the affected commit. It was merely a cleanup. Although it brings VLA again, it's clearer as a fix. We'll address the VLA later in another patch. Fixes: 89b593c30e83 ("ALSA: usb-audio: purge needless variable length array") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195875 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_us16x08.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index dc48eedea92e..29d2c9282987 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -698,12 +698,12 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, struct snd_usb_audio *chip = elem->head.mixer->chip; struct snd_us16x08_meter_store *store = elem->private_data; u8 meter_urb[64]; - char tmp[sizeof(mix_init_msg2)] = {0}; + char tmp[max(sizeof(mix_init_msg1), sizeof(mix_init_msg2))]; switch (kcontrol->private_value) { case 0: - snd_us16x08_send_urb(chip, (char *)mix_init_msg1, - sizeof(mix_init_msg1)); + memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1)); + snd_us16x08_send_urb(chip, tmp, 4); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value++; @@ -721,7 +721,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, case 3: memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2)); tmp[2] = snd_get_meter_comp_index(store); - snd_us16x08_send_urb(chip, tmp, sizeof(mix_init_msg2)); + snd_us16x08_send_urb(chip, tmp, 10); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value = 0; From af03bb0cabf4d19b1873da98af124439f2b21094 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 23:21:07 +0200 Subject: [PATCH 419/465] ALSA: usb: Fix a typo in Tascam US-16x08 mixer element commit 617163fc2580da3d489b6c1bacb6312e0e2aac02 upstream. A mixer element created in a quirk for Tascam US-16x08 contains a typo: it should be "EQ MidLow Q" instead of "EQ MidQLow Q". Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195875 Fixes: d2bb390a2081 ("ALSA: usb-audio: Tascam US-16x08 DSP mixer quirk") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_us16x08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 29d2c9282987..442d8f7998e3 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -1135,7 +1135,7 @@ static const struct snd_us16x08_control_params eq_controls[] = { .control_id = SND_US16X08_ID_EQLOWMIDWIDTH, .type = USB_MIXER_U8, .num_channels = 16, - .name = "EQ MidQLow Q", + .name = "EQ MidLow Q", }, { /* EQ mid high gain */ .kcontrol_new = &snd_us16x08_eq_gain_ctl, From de12c73fa2de588d9dc532a0238ea98e1ac83121 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 2 Jun 2017 14:46:31 -0700 Subject: [PATCH 420/465] mm/page_alloc.c: make sure OOM victim can try allocations with no watermarks once commit c288983dddf714216428774e022ad78f48dd8cb1 upstream. Roman Gushchin has reported that the OOM killer can trivially selects next OOM victim when a thread doing memory allocation from page fault path was selected as first OOM victim. allocate invoked oom-killer: gfp_mask=0x14280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=(null), order=0, oom_score_adj=0 allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: oom_kill_process+0x219/0x3e0 out_of_memory+0x11d/0x480 __alloc_pages_slowpath+0xc84/0xd40 __alloc_pages_nodemask+0x245/0x260 alloc_pages_vma+0xa2/0x270 __handle_mm_fault+0xca9/0x10c0 handle_mm_fault+0xf3/0x210 __do_page_fault+0x240/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... Out of memory: Kill process 492 (allocate) score 899 or sacrifice child Killed process 492 (allocate) total-vm:2052368kB, anon-rss:1894576kB, file-rss:4kB, shmem-rss:0kB allocate: page allocation failure: order:0, mode:0x14280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=(null) allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: __alloc_pages_slowpath+0xd32/0xd40 __alloc_pages_nodemask+0x245/0x260 alloc_pages_vma+0xa2/0x270 __handle_mm_fault+0xca9/0x10c0 handle_mm_fault+0xf3/0x210 __do_page_fault+0x240/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... oom_reaper: reaped process 492 (allocate), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB ... allocate invoked oom-killer: gfp_mask=0x0(), nodemask=(null), order=0, oom_score_adj=0 allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: oom_kill_process+0x219/0x3e0 out_of_memory+0x11d/0x480 pagefault_out_of_memory+0x68/0x80 mm_fault_error+0x8f/0x190 ? handle_mm_fault+0xf3/0x210 __do_page_fault+0x4b2/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... Out of memory: Kill process 233 (firewalld) score 10 or sacrifice child Killed process 233 (firewalld) total-vm:246076kB, anon-rss:20956kB, file-rss:0kB, shmem-rss:0kB There is a race window that the OOM reaper completes reclaiming the first victim's memory while nothing but mutex_trylock() prevents the first victim from calling out_of_memory() from pagefault_out_of_memory() after memory allocation for page fault path failed due to being selected as an OOM victim. This is a side effect of commit 9a67f6488eca926f ("mm: consolidate GFP_NOFAIL checks in the allocator slowpath") because that commit silently changed the behavior from /* Avoid allocations with no watermarks from looping endlessly */ to /* * Give up allocations without trying memory reserves if selected * as an OOM victim */ in __alloc_pages_slowpath() by moving the location to check TIF_MEMDIE flag. I have noticed this change but I didn't post a patch because I thought it is an acceptable change other than noise by warn_alloc() because !__GFP_NOFAIL allocations are allowed to fail. But we overlooked that failing memory allocation from page fault path makes difference due to the race window explained above. While it might be possible to add a check to pagefault_out_of_memory() that prevents the first victim from calling out_of_memory() or remove out_of_memory() from pagefault_out_of_memory(), changing pagefault_out_of_memory() does not suppress noise by warn_alloc() when allocating thread was selected as an OOM victim. There is little point with printing similar backtraces and memory information from both out_of_memory() and warn_alloc(). Instead, if we guarantee that current thread can try allocations with no watermarks once when current thread looping inside __alloc_pages_slowpath() was selected as an OOM victim, we can follow "who can use memory reserves" rules and suppress noise by warn_alloc() and prevent memory allocations from page fault path from calling pagefault_out_of_memory(). If we take the comment literally, this patch would do - if (test_thread_flag(TIF_MEMDIE)) - goto nopage; + if (alloc_flags == ALLOC_NO_WATERMARKS || (gfp_mask & __GFP_NOMEMALLOC)) + goto nopage; because gfp_pfmemalloc_allowed() returns false if __GFP_NOMEMALLOC is given. But if I recall correctly (I couldn't find the message), the condition is meant to apply to only OOM victims despite the comment. Therefore, this patch preserves TIF_MEMDIE check. Fixes: 9a67f6488eca926f ("mm: consolidate GFP_NOFAIL checks in the allocator slowpath") Link: http://lkml.kernel.org/r/201705192112.IAF69238.OQOHSJLFOFFMtV@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: Roman Gushchin Tested-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c5fee5a0316d..e86db1911350 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3834,7 +3834,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto got_pg; /* Avoid allocations with no watermarks from looping endlessly */ - if (test_thread_flag(TIF_MEMDIE)) + if (test_thread_flag(TIF_MEMDIE) && + (alloc_flags == ALLOC_NO_WATERMARKS || + (gfp_mask & __GFP_NOMEMALLOC))) goto nopage; /* Retry as long as the OOM killer is making progress */ From b16e6ab5adce667dcbf356c15b30712b9c8fa871 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 2 Jun 2017 14:46:34 -0700 Subject: [PATCH 421/465] mm: avoid spurious 'bad pmd' warning messages commit d0f0931de936a0a468d7e59284d39581c16d3a73 upstream. When the pmd_devmap() checks were added by 5c7fb56e5e3f ("mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd") to add better support for DAX huge pages, they were all added to the end of if() statements after existing pmd_trans_huge() checks. So, things like: - if (pmd_trans_huge(*pmd)) + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) When further checks were added after pmd_trans_unstable() checks by commit 7267ec008b5c ("mm: postpone page table allocation until we have page to map") they were also added at the end of the conditional: + if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) This ordering is fine for pmd_trans_huge(), but doesn't work for pmd_trans_unstable(). This is because DAX huge pages trip the bad_pmd() check inside of pmd_none_or_trans_huge_or_clear_bad() (called by pmd_trans_unstable()), which prints out a warning and returns 1. So, we do end up doing the right thing, but only after spamming dmesg with suspicious looking messages: mm/pgtable-generic.c:39: bad pmd ffff8808daa49b88(84000001006000a5) Reorder these checks in a helper so that pmd_devmap() is checked first, avoiding the error messages, and add a comment explaining why the ordering is important. Fixes: commit 7267ec008b5c ("mm: postpone page table allocation until we have page to map") Link: http://lkml.kernel.org/r/20170522215749.23516-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Pawel Lebioda Cc: "Darrick J. Wong" Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: Matthew Wilcox Cc: "Kirill A . Shutemov" Cc: Dave Jiang Cc: Xiong Zhou Cc: Eryu Guan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 235ba51b2fbf..2437dc08ab36 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3029,6 +3029,17 @@ static int __do_fault(struct vm_fault *vmf) return ret; } +/* + * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. + * If we check pmd_trans_unstable() first we will trip the bad_pmd() check + * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly + * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. + */ +static int pmd_devmap_trans_unstable(pmd_t *pmd) +{ + return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); +} + static int pte_alloc_one_map(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -3052,18 +3063,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf) map_pte: /* * If a huge pmd materialized under us just retry later. Use - * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd - * didn't become pmd_trans_huge under us and then back to pmd_none, as - * a result of MADV_DONTNEED running immediately after a huge pmd fault - * in a different thread of this mm, in turn leading to a misleading - * pmd_trans_huge() retval. All we have to ensure is that it is a - * regular pmd that we can walk with pte_offset_map() and we can do that - * through an atomic read in C, which is what pmd_trans_unstable() - * provides. + * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of + * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge + * under us and then back to pmd_none, as a result of MADV_DONTNEED + * running immediately after a huge pmd fault in a different thread of + * this mm, in turn leading to a misleading pmd_trans_huge() retval. + * All we have to ensure is that it is a regular pmd that we can walk + * with pte_offset_map() and we can do that through an atomic read in + * C, which is what pmd_trans_unstable() provides. */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return VM_FAULT_NOPAGE; + /* + * At this point we know that our vmf->pmd points to a page of ptes + * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() + * for the duration of the fault. If a racing MADV_DONTNEED runs and + * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still + * be valid and we will re-check to make sure the vmf->pte isn't + * pte_none() under vmf->ptl protection when we return to + * alloc_set_pte(). + */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); return 0; @@ -3690,7 +3710,7 @@ static int handle_pte_fault(struct vm_fault *vmf) vmf->pte = NULL; } else { /* See comment in pte_alloc_one_map() */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge From bdaac1fe714769eae7d6e438fc1b1091a0af4a23 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 2 Jun 2017 14:46:37 -0700 Subject: [PATCH 422/465] dax: fix race between colliding PMD & PTE entries commit e2093926a098a8ccf0f1d10f6df8dad452cb28d3 upstream. We currently have two related PMD vs PTE races in the DAX code. These can both be easily triggered by having two threads reading and writing simultaneously to the same private mapping, with the key being that private mapping reads can be handled with PMDs but private mapping writes are always handled with PTEs so that we can COW. Here is the first race: CPU 0 CPU 1 (private mapping write) __handle_mm_fault() create_huge_pmd() - FALLBACK handle_pte_fault() passes check for pmd_devmap() (private mapping read) __handle_mm_fault() create_huge_pmd() dax_iomap_pmd_fault() inserts PMD dax_iomap_pte_fault() does a PTE fault, but we already have a DAX PMD installed in our page tables at this spot. Here's the second race: CPU 0 CPU 1 (private mapping read) __handle_mm_fault() passes check for pmd_none() create_huge_pmd() dax_iomap_pmd_fault() inserts PMD (private mapping write) __handle_mm_fault() create_huge_pmd() - FALLBACK (private mapping read) __handle_mm_fault() passes check for pmd_none() create_huge_pmd() handle_pte_fault() dax_iomap_pte_fault() inserts PTE dax_iomap_pmd_fault() inserts PMD, but we already have a PTE at this spot. The core of the issue is that while there is isolation between faults to the same range in the DAX fault handlers via our DAX entry locking, there is no isolation between faults in the code in mm/memory.c. This means for instance that this code in __handle_mm_fault() can run: if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { ret = create_huge_pmd(&vmf); But by the time we actually get to run the fault handler called by create_huge_pmd(), the PMD is no longer pmd_none() because a racing PTE fault has installed a normal PMD here as a parent. This is the cause of the 2nd race. The first race is similar - there is the following check in handle_pte_fault(): } else { /* See comment in pte_alloc_one_map() */ if (pmd_devmap(*vmf->pmd) || pmd_trans_unstable(vmf->pmd)) return 0; So if a pmd_devmap() PMD (a DAX PMD) has been installed at vmf->pmd, we will bail and retry the fault. This is correct, but there is nothing preventing the PMD from being installed after this check but before we actually get to the DAX PTE fault handlers. In my testing these races result in the following types of errors: BUG: Bad rss-counter state mm:ffff8800a817d280 idx:1 val:1 BUG: non-zero nr_ptes on freeing mm: 15 Fix this issue by having the DAX fault handlers verify that it is safe to continue their fault after they have taken an entry lock to block other racing faults. [ross.zwisler@linux.intel.com: improve fix for colliding PMD & PTE entries] Link: http://lkml.kernel.org/r/20170526195932.32178-1-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/20170522215749.23516-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: Pawel Lebioda Reviewed-by: Jan Kara Cc: "Darrick J. Wong" Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: Matthew Wilcox Cc: "Kirill A . Shutemov" Cc: Pawel Lebioda Cc: Dave Jiang Cc: Xiong Zhou Cc: Eryu Guan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index db0cc52eb22e..285f4ab6f498 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1128,6 +1128,17 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, if (IS_ERR(entry)) return dax_fault_return(PTR_ERR(entry)); + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PMD fault that overlaps with + * the PTE we need to set up. If so just return and the fault will be + * retried. + */ + if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { + vmf_ret = VM_FAULT_NOPAGE; + goto unlock_entry; + } + /* * Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means @@ -1362,6 +1373,18 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, if (IS_ERR(entry)) goto fallback; + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PTE fault that overlaps with + * the PMD we need to set up. If so just return and the fault will be + * retried. + */ + if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && + !pmd_devmap(*vmf->pmd)) { + result = 0; + goto unlock_entry; + } + /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way From a0189db30dc0c074236028abe16f9b25794d9975 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 2 Jun 2017 14:46:40 -0700 Subject: [PATCH 423/465] mm/migrate: fix refcount handling when !hugepage_migration_supported() commit 30809f559a0d348c2dfd7ab05e9a451e2384962e upstream. On failing to migrate a page, soft_offline_huge_page() performs the necessary update to the hugepage ref-count. But when !hugepage_migration_supported() , unmap_and_move_hugepage() also decrements the page ref-count for the hugepage. The combined behaviour leaves the ref-count in an inconsistent state. This leads to soft lockups when running the overcommitted hugepage test from mce-tests suite. Soft offlining pfn 0x83ed600 at process virtual address 0x400000000000 soft offline: 0x83ed600: migration failed 1, type 1fffc00000008008 (uptodate|head) INFO: rcu_preempt detected stalls on CPUs/tasks: Tasks blocked on level-0 rcu_node (CPUs 0-7): P2715 (detected by 7, t=5254 jiffies, g=963, c=962, q=321) thugetlb_overco R running task 0 2715 2685 0x00000008 Call trace: dump_backtrace+0x0/0x268 show_stack+0x24/0x30 sched_show_task+0x134/0x180 rcu_print_detail_task_stall_rnp+0x54/0x7c rcu_check_callbacks+0xa74/0xb08 update_process_times+0x34/0x60 tick_sched_handle.isra.7+0x38/0x70 tick_sched_timer+0x4c/0x98 __hrtimer_run_queues+0xc0/0x300 hrtimer_interrupt+0xac/0x228 arch_timer_handler_phys+0x3c/0x50 handle_percpu_devid_irq+0x8c/0x290 generic_handle_irq+0x34/0x50 __handle_domain_irq+0x68/0xc0 gic_handle_irq+0x5c/0xb0 Address this by changing the putback_active_hugepage() in soft_offline_huge_page() to putback_movable_pages(). This only triggers on systems that enable memory failure handling (ARCH_SUPPORTS_MEMORY_FAILURE) but not hugepage migration (!ARCH_ENABLE_HUGEPAGE_MIGRATION). I imagine this wasn't triggered as there aren't many systems running this configuration. [akpm@linux-foundation.org: remove dead comment, per Naoya] Link: http://lkml.kernel.org/r/20170525135146.32011-1-punit.agrawal@arm.com Reported-by: Manoj Iyer Tested-by: Manoj Iyer Suggested-by: Naoya Horiguchi Signed-off-by: Punit Agrawal Cc: Joonsoo Kim Cc: Wanpeng Li Cc: Christoph Lameter Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 27f7210e7fab..d7780dfdf541 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1587,12 +1587,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); - /* - * We know that soft_offline_huge_page() tries to migrate - * only one hugepage pointed to by hpage, so we need not - * run through the pagelist here. - */ - putback_active_hugepage(hpage); + if (!list_empty(&pagelist)) + putback_movable_pages(&pagelist); if (ret > 0) ret = -EIO; } else { From f814bf465578b4bf2d4ae1329e8f35d89c040d2d Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 2 Jun 2017 14:46:43 -0700 Subject: [PATCH 424/465] mlock: fix mlock count can not decrease in race condition commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc upstream. Kefeng reported that when running the follow test, the mlock count in meminfo will increase permanently: [1] testcase linux:~ # cat test_mlockal grep Mlocked /proc/meminfo for j in `seq 0 10` do for i in `seq 4 15` do ./p_mlockall >> log & done sleep 0.2 done # wait some time to let mlock counter decrease and 5s may not enough sleep 5 grep Mlocked /proc/meminfo linux:~ # cat p_mlockall.c #include #include #include #define SPACE_LEN 4096 int main(int argc, char ** argv) { int ret; void *adr = malloc(SPACE_LEN); if (!adr) return -1; ret = mlockall(MCL_CURRENT | MCL_FUTURE); printf("mlcokall ret = %d\n", ret); ret = munlockall(); printf("munlcokall ret = %d\n", ret); free(adr); return 0; } In __munlock_pagevec() we should decrement NR_MLOCK for each page where we clear the PageMlocked flag. Commit 1ebb7cc6a583 ("mm: munlock: batch NR_MLOCK zone state updates") has introduced a bug where we don't decrement NR_MLOCK for pages where we clear the flag, but fail to isolate them from the lru list (e.g. when the pages are on some other cpu's percpu pagevec). Since PageMlocked stays cleared, the NR_MLOCK accounting gets permanently disrupted by this. Fix it by counting the number of page whose PageMlock flag is cleared. Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates") Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Reported-by: Kefeng Wang Tested-by: Kefeng Wang Cc: Vlastimil Babka Cc: Joern Engel Cc: Mel Gorman Cc: Michel Lespinasse Cc: Hugh Dickins Cc: Rik van Riel Cc: Johannes Weiner Cc: Michal Hocko Cc: Xishi Qiu Cc: zhongjiang Cc: Hanjun Guo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mlock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index 0dd9ca18e19e..721679a2c1aa 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -286,7 +286,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked; + int delta_munlocked = -nr; struct pagevec pvec_putback; int pgrescued = 0; @@ -306,6 +306,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) continue; else __munlock_isolation_failed(page); + } else { + delta_munlocked++; } /* @@ -317,7 +319,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; } - delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(zone_lru_lock(zone)); From 1cc89263447cec61b46b124d706d2a369cfe7f50 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Jun 2017 14:46:46 -0700 Subject: [PATCH 425/465] mm/hugetlb: report -EHWPOISON not -EFAULT when FOLL_HWPOISON is specified commit 9a291a7c9428155e8e623e4a3989f8be47134df5 upstream. KVM uses get_user_pages() to resolve its stage2 faults. KVM sets the FOLL_HWPOISON flag causing faultin_page() to return -EHWPOISON when it finds a VM_FAULT_HWPOISON. KVM handles these hwpoison pages as a special case. (check_user_page_hwpoison()) When huge pages are involved, this doesn't work so well. get_user_pages() calls follow_hugetlb_page(), which stops early if it receives VM_FAULT_HWPOISON from hugetlb_fault(), eventually returning -EFAULT to the caller. The step to map this to -EHWPOISON based on the FOLL_ flags is missing. The hwpoison special case is skipped, and -EFAULT is returned to user-space, causing Qemu or kvmtool to exit. Instead, move this VM_FAULT_ to errno mapping code into a header file and use it from faultin_page() and follow_hugetlb_page(). With this, KVM works as expected. This isn't a problem for arm64 today as we haven't enabled MEMORY_FAILURE, but I can't see any reason this doesn't happen on x86 too, so I think this should be a fix. This doesn't apply earlier than stable's v4.11.1 due to all sorts of cleanup. [james.morse@arm.com: add vm_fault_to_errno() call to faultin_page()] suggested. Link: http://lkml.kernel.org/r/20170525171035.16359-1-james.morse@arm.com [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170524160900.28786-1-james.morse@arm.com Signed-off-by: James Morse Acked-by: Punit Agrawal Acked-by: Naoya Horiguchi Cc: "Kirill A . Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 11 +++++++++++ mm/gup.c | 20 ++++++++------------ mm/hugetlb.c | 5 +++++ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 00a8fa7e366a..018b134f6427 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2315,6 +2315,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ +static inline int vm_fault_to_errno(int vm_fault, int foll_flags) +{ + if (vm_fault & VM_FAULT_OOM) + return -ENOMEM; + if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) + return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT; + if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) + return -EFAULT; + return 0; +} + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, diff --git a/mm/gup.c b/mm/gup.c index 04aa405350dc..fb87cbf37e52 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -407,12 +407,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, ret = handle_mm_fault(vma, address, fault_flags); if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, *flags); + + if (err) + return err; BUG(); } @@ -723,12 +721,10 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, ret = handle_mm_fault(vma, address, fault_flags); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return -EHWPOISON; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, 0); + + if (err) + return err; BUG(); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e5828875f7bb..3eedb187e549 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, } ret = hugetlb_fault(mm, vma, vaddr, fault_flags); if (ret & VM_FAULT_ERROR) { + int err = vm_fault_to_errno(ret, flags); + + if (err) + return err; + remainder = 0; break; } From dbab02324500a59bafce597f07350462ac5866f2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Jun 2017 14:46:49 -0700 Subject: [PATCH 426/465] mm: consider memblock reservations for deferred memory initialization sizing commit 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 upstream. We have seen an early OOM killer invocation on ppc64 systems with crashkernel=4096M: kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0 kthreadd cpuset=/ mems_allowed=7 CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1 Call Trace: dump_stack+0xb0/0xf0 (unreliable) dump_header+0xb0/0x258 out_of_memory+0x5f0/0x640 __alloc_pages_nodemask+0xa8c/0xc80 kmem_getpages+0x84/0x1a0 fallback_alloc+0x2a4/0x320 kmem_cache_alloc_node+0xc0/0x2e0 copy_process.isra.25+0x260/0x1b30 _do_fork+0x94/0x470 kernel_thread+0x48/0x60 kthreadd+0x264/0x330 ret_from_kernel_thread+0x5c/0xa4 Mem-Info: active_anon:0 inactive_anon:0 isolated_anon:0 active_file:0 inactive_file:0 isolated_file:0 unevictable:0 dirty:0 writeback:0 unstable:0 slab_reclaimable:5 slab_unreclaimable:73 mapped:0 shmem:0 pagetables:0 bounce:0 free:0 free_pcp:0 free_cma:0 Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes lowmem_reserve[]: 0 0 0 0 Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB 0 total pagecache pages 0 pages in swap cache Swap cache stats: add 0, delete 0, find 0/0 Free swap = 0kB Total swap = 0kB 819200 pages RAM 0 pages HighMem/MovableOnly 817481 pages reserved 0 pages cma reserved 0 pages hwpoisoned the reason is that the managed memory is too low (only 110MB) while the rest of the the 50GB is still waiting for the deferred intialization to be done. update_defer_init estimates the initial memoty to initialize to 2GB at least but it doesn't consider any memory allocated in that range. In this particular case we've had Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB) so the low 2GB is mostly depleted. Fix this by considering memblock allocations in the initial static initialization estimation. Move the max_initialise to reset_deferred_meminit and implement a simple memblock_reserved_memory helper which iterates all reserved blocks and sums the size of all that start below the given address. The cumulative size is than added on top of the initial estimation. This is still not ideal because reset_deferred_meminit doesn't consider holes and so reservation might be above the initial estimation whihch we ignore but let's make the logic simpler until we really need to handle more complicated cases. Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz Signed-off-by: Michal Hocko Acked-by: Mel Gorman Tested-by: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/memblock.h | 8 ++++++++ include/linux/mmzone.h | 1 + mm/memblock.c | 23 +++++++++++++++++++++++ mm/page_alloc.c | 33 ++++++++++++++++++++++----------- 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index bdfc65af4152..14dbc4fd0a92 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -423,12 +423,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) } #endif +extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr); #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { return 0; } +static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr) +{ + return 0; +} + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d45172b559d8..a3e9b7ba2c25 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -672,6 +672,7 @@ typedef struct pglist_data { * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; + unsigned long static_init_size; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/memblock.c b/mm/memblock.c index 696f06d17c4e..a749101ed2a2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1713,6 +1713,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type) } } +extern unsigned long __init_memblock +memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr) +{ + struct memblock_region *rgn; + unsigned long size = 0; + int idx; + + for_each_memblock_type((&memblock.reserved), rgn) { + phys_addr_t start, end; + + if (rgn->base + rgn->size < start_addr) + continue; + if (rgn->base > end_addr) + continue; + + start = rgn->base; + end = start + rgn->size; + size += end - start; + } + + return size; +} + void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e86db1911350..a2019fc9d94d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -291,6 +291,26 @@ int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT static inline void reset_deferred_meminit(pg_data_t *pgdat) { + unsigned long max_initialise; + unsigned long reserved_lowmem; + + /* + * Initialise at least 2G of a node but also take into account that + * two large system hashes that can take up 1GB for 0.25TB/node. + */ + max_initialise = max(2UL << (30 - PAGE_SHIFT), + (pgdat->node_spanned_pages >> 8)); + + /* + * Compensate the all the memblock reservations (e.g. crash kernel) + * from the initial estimation to make sure we will initialize enough + * memory to boot. + */ + reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, + pgdat->node_start_pfn + max_initialise); + max_initialise += reserved_lowmem; + + pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); pgdat->first_deferred_pfn = ULONG_MAX; } @@ -313,20 +333,11 @@ static inline bool update_defer_init(pg_data_t *pgdat, unsigned long pfn, unsigned long zone_end, unsigned long *nr_initialised) { - unsigned long max_initialise; - /* Always populate low zones for address-contrained allocations */ if (zone_end < pgdat_end_pfn(pgdat)) return true; - /* - * Initialise at least 2G of a node but also take into account that - * two large system hashes that can take up 1GB for 0.25TB/node. - */ - max_initialise = max(2UL << (30 - PAGE_SHIFT), - (pgdat->node_spanned_pages >> 8)); - (*nr_initialised)++; - if ((*nr_initialised > max_initialise) && + if ((*nr_initialised > pgdat->static_init_size) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { pgdat->first_deferred_pfn = pfn; return false; @@ -6100,7 +6111,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, /* pg_data_t should be reset to zero when it's allocated */ WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx); - reset_deferred_meminit(pgdat); pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; pgdat->per_cpu_nodestats = NULL; @@ -6122,6 +6132,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, (unsigned long)pgdat->node_mem_map); #endif + reset_deferred_meminit(pgdat); free_area_init_core(pgdat); } From 68c98967e7a2f59863770a02ba402b7b5a6bd62a Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 11 May 2017 18:52:36 +0300 Subject: [PATCH 427/465] RDMA/srp: Fix NULL deref at srp_destroy_qp() commit 95c2ef50c726a51d580c35ae8dccd383abaa8701 upstream. If srp_init_qp() fails at srp_create_ch_ib() then ch->send_cq may be NULL. Calling directly to ib_destroy_qp() is sufficient because no work requests were posted on the created qp. Fixes: 9294000d6d89 ("IB/srp: Drain the send queue before destroying a QP") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Bart van Assche -- Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index cee46266f434..f6b5e14a7eab 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -570,7 +570,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return 0; err_qp: - srp_destroy_qp(ch, qp); + ib_destroy_qp(qp); err_send_cq: ib_free_cq(send_cq); From f7e82ab3b65e075b956a0a41d3a37d21ecdb8b07 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:02:00 -0700 Subject: [PATCH 428/465] RDMA/qib,hfi1: Fix MR reference count leak on write with immediate commit 1feb40067cf04ae48d65f728d62ca255c9449178 upstream. The handling of IB_RDMA_WRITE_ONLY_WITH_IMMEDIATE will leak a memory reference when a buffer cannot be allocated for returning the immediate data. The issue is that the rkey validation has already occurred and the RNR nak fails to release the reference that was fruitlessly gotten. The the peer will send the identical single packet request when its RNR timer pops. The fix is to release the held reference prior to the rnr nak exit. This is the only sequence the requires both rkey validation and the buffer allocation on the same packet. Tested-by: Tadeusz Struk Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/rc.c | 5 ++++- drivers/infiniband/hw/qib/qib_rc.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 7382be11afca..aa71b7384640 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2149,8 +2149,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; - if (!ret) + if (!ret) { + /* peer will send again */ + rvt_put_ss(&qp->r_sge); goto rnr_nak; + } wc.ex.imm_data = ohdr->u.rc.imm_data; wc.wc_flags = IB_WC_WITH_IMM; goto send_last; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 12658e3fe154..f7dfccf9a1e1 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1947,8 +1947,10 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, ret = qib_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; - if (!ret) + if (!ret) { + rvt_put_ss(&qp->r_sge); goto rnr_nak; + } wc.ex.imm_data = ohdr->u.rc.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; From d6eaf7a4d6230c1d88e1b8afe3179947fbc007b1 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sat, 20 May 2017 15:03:29 -0500 Subject: [PATCH 429/465] x86/boot: Use CROSS_COMPILE prefix for readelf commit 3780578761921f094179c6289072a74b2228c602 upstream. The boot code Makefile contains a straight 'readelf' invocation. This causes build warnings in cross compile environments, when there is no unprefixed readelf accessible via $PATH. Add the missing $(CROSS_COMPILE) prefix. [ tglx: Rewrote changelog ] Fixes: 98f78525371b ("x86/boot: Refuse to build with data relocations") Signed-off-by: Rob Landley Acked-by: Kees Cook Cc: Jiri Kosina Cc: Paul Bolle Cc: "H.J. Lu" Link: http://lkml.kernel.org/r/ced18878-693a-9576-a024-113ef39a22c0@landley.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 44163e8c3868..2c860ad4fe06 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -94,7 +94,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - readelf -S $$obj | grep -qF .rel.local && { \ + ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ From 6caa2db34d34ab0584c4c4f1b57c24c9859bbbb3 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 2 Jun 2017 14:46:11 -0700 Subject: [PATCH 430/465] ksm: prevent crash after write_protect_page fails commit a7306c3436e9c8e584a4b9fad5f3dc91be2a6076 upstream. "err" needs to be left set to -EFAULT if split_huge_page succeeds. Otherwise if "err" gets clobbered with zero and write_protect_page fails, try_to_merge_one_page() will succeed instead of returning -EFAULT and then try_to_merge_with_ksm_page() will continue thinking kpage is a PageKsm when in fact it's still an anonymous page. Eventually it'll crash in page_add_anon_rmap. This has been reproduced on Fedora25 kernel but I can reproduce with upstream too. The bug was introduced in commit f765f540598a ("ksm: prepare to new THP semantics") introduced in v4.5. page:fffff67546ce1cc0 count:4 mapcount:2 mapping:ffffa094551e36e1 index:0x7f0f46673 flags: 0x2ffffc0004007c(referenced|uptodate|dirty|lru|active|swapbacked) page dumped because: VM_BUG_ON_PAGE(!PageLocked(page)) page->mem_cgroup:ffffa09674bf0000 ------------[ cut here ]------------ kernel BUG at mm/rmap.c:1222! CPU: 1 PID: 76 Comm: ksmd Not tainted 4.9.3-200.fc25.x86_64 #1 RIP: do_page_add_anon_rmap+0x1c4/0x240 Call Trace: page_add_anon_rmap+0x18/0x20 try_to_merge_with_ksm_page+0x50b/0x780 ksm_scan_thread+0x1211/0x1410 ? prepare_to_wait_event+0x100/0x100 ? try_to_merge_with_ksm_page+0x780/0x780 kthread+0xd9/0xf0 ? kthread_park+0x60/0x60 ret_from_fork+0x25/0x30 Fixes: f765f54059 ("ksm: prepare to new THP semantics") Link: http://lkml.kernel.org/r/20170513131040.21732-1-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Federico Simoncelli Acked-by: Kirill A. Shutemov Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/ksm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 19b4f2dea7a5..11891ca81e40 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, goto out; if (PageTransCompound(page)) { - err = split_huge_page(page); - if (err) + if (split_huge_page(page)) goto out_unlock; } From 74b4db844f3a435cf16eea7e499ca353c415b722 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Jun 2017 14:46:25 -0700 Subject: [PATCH 431/465] slub/memcg: cure the brainless abuse of sysfs attributes commit 478fe3037b2278d276d4cd9cd0ab06c4cb2e9b32 upstream. memcg_propagate_slab_attrs() abuses the sysfs attribute file functions to propagate settings from the root kmem_cache to a newly created kmem_cache. It does that with: attr->show(root, buf); attr->store(new, buf, strlen(bug); Aside of being a lazy and absurd hackery this is broken because it does not check the return value of the show() function. Some of the show() functions return 0 w/o touching the buffer. That means in such a case the store function is called with the stale content of the previous show(). That causes nonsense like invoking kmem_cache_shrink() on a newly created kmem_cache. In the worst case it would cause handing in an uninitialized buffer. This should be rewritten proper by adding a propagate() callback to those slub_attributes which must be propagated and avoid that insane conversion to and from ASCII, but that's too large for a hot fix. Check at least the return value of the show() function, so calling store() with stale content is prevented. Steven said: "It can cause a deadlock with get_online_cpus() that has been uncovered by recent cpu hotplug and lockdep changes that Thomas and Peter have been doing. Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(slab_mutex); lock(cpu_hotplug.lock); lock(slab_mutex); *** DEADLOCK ***" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1705201244540.2255@nanos Signed-off-by: Thomas Gleixner Reported-by: Steven Rostedt Acked-by: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 7f4bc7027ed5..e3863a22e10d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) char mbuf[64]; char *buf; struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); + ssize_t len; if (!attr || !attr->store || !attr->show) continue; @@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) buf = buffer; } - attr->show(root_cache, buf); - attr->store(s, buf, strlen(buf)); + len = attr->show(root_cache, buf); + if (len > 0) + attr->store(s, buf, len); } if (buffer) From 4ec50822a58a275f09fbb433089eb906ab6d05c7 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 18 Apr 2017 13:43:32 +0200 Subject: [PATCH 432/465] drm/gma500/psb: Actually use VBT mode when it is found commit 82bc9a42cf854fdf63155759c0aa790bd1f361b0 upstream. With LVDS we were incorrectly picking the pre-programmed mode instead of the prefered mode provided by VBT. Make sure we pick the VBT mode if one is provided. It is likely that the mode read-out code is still wrong but this patch fixes the immediate problem on most machines. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78562 Signed-off-by: Patrik Jakobsson Link: http://patchwork.freedesktop.org/patch/msgid/20170418114332.12183-1-patrik.r.jakobsson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/psb_intel_lvds.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index 483fdce74e39..97c444856d09 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -760,20 +760,23 @@ void psb_intel_lvds_init(struct drm_device *dev, if (scan->type & DRM_MODE_TYPE_PREFERRED) { mode_dev->panel_fixed_mode = drm_mode_duplicate(dev, scan); + DRM_DEBUG_KMS("Using mode from DDC\n"); goto out; /* FIXME: check for quirks */ } } /* Failed to get EDID, what about VBT? do we need this? */ - if (mode_dev->vbt_mode) + if (dev_priv->lfp_lvds_vbt_mode) { mode_dev->panel_fixed_mode = - drm_mode_duplicate(dev, mode_dev->vbt_mode); + drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode); - if (!mode_dev->panel_fixed_mode) - if (dev_priv->lfp_lvds_vbt_mode) - mode_dev->panel_fixed_mode = - drm_mode_duplicate(dev, - dev_priv->lfp_lvds_vbt_mode); + if (mode_dev->panel_fixed_mode) { + mode_dev->panel_fixed_mode->type |= + DRM_MODE_TYPE_PREFERRED; + DRM_DEBUG_KMS("Using mode from VBT\n"); + goto out; + } + } /* * If we didn't get EDID, try checking if the panel is already turned @@ -790,6 +793,7 @@ void psb_intel_lvds_init(struct drm_device *dev, if (mode_dev->panel_fixed_mode) { mode_dev->panel_fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; + DRM_DEBUG_KMS("Using pre-programmed mode\n"); goto out; /* FIXME: check for quirks */ } } From 34836549fbc994bf122e6ae180413822e37b8977 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 May 2017 16:36:22 -0700 Subject: [PATCH 433/465] xfs: Fix missed holes in SEEK_HOLE implementation commit 5375023ae1266553a7baa0845e82917d8803f48c upstream. XFS SEEK_HOLE implementation could miss a hole in an unwritten extent as can be seen by the following command: xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k" -c "seek -h 0" file wrote 57344/57344 bytes at offset 0 56 KiB, 14 ops; 0.0000 sec (49.312 MiB/sec and 12623.9856 ops/sec) wrote 8192/8192 bytes at offset 131072 8 KiB, 2 ops; 0.0000 sec (70.383 MiB/sec and 18018.0180 ops/sec) Whence Result HOLE 139264 Where we can see that hole at offset 56k was just ignored by SEEK_HOLE implementation. The bug is in xfs_find_get_desired_pgoff() which does not properly detect the case when pages are not contiguous. Fix the problem by properly detecting when found page has larger offset than expected. Fixes: d126d43f631f996daeee5006714fed914be32368 Signed-off-by: Jan Kara Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 35703a801372..f371812e20c6 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1076,17 +1076,6 @@ xfs_find_get_desired_pgoff( break; } - /* - * At lease we found one page. If this is the first time we - * step into the loop, and if the first page index offset is - * greater than the given search offset, a hole was found. - */ - if (type == HOLE_OFF && lastoff == startoff && - lastoff < page_offset(pvec.pages[0])) { - found = true; - break; - } - for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; loff_t b_offset; @@ -1098,18 +1087,18 @@ xfs_find_get_desired_pgoff( * file mapping. However, page->index will not change * because we have a reference on the page. * - * Searching done if the page index is out of range. - * If the current offset is not reaches the end of - * the specified search range, there should be a hole - * between them. + * If current page offset is beyond where we've ended, + * we've found a hole. */ - if (page->index > end) { - if (type == HOLE_OFF && lastoff < endoff) { - *offset = lastoff; - found = true; - } + if (type == HOLE_OFF && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { + found = true; + *offset = lastoff; goto out; } + /* Searching done if the page index is out of range. */ + if (page->index > end) + goto out; lock_page(page); /* From 0364c225a57c7db294c71ca59a5515e5ffcf25da Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 31 May 2017 08:22:52 -0700 Subject: [PATCH 434/465] xfs: use ->b_state to fix buffer I/O accounting release race commit 63db7c815bc0997c29e484d2409684fdd9fcd93b upstream. We've had user reports of unmount hangs in xfs_wait_buftarg() that analysis shows is due to btp->bt_io_count == -1. bt_io_count represents the count of in-flight asynchronous buffers and thus should always be >= 0. xfs_wait_buftarg() waits for this value to stabilize to zero in order to ensure that all untracked (with respect to the lru) buffers have completed I/O processing before unmount proceeds to tear down in-core data structures. The value of -1 implies an I/O accounting decrement race. Indeed, the fact that xfs_buf_ioacct_dec() is called from xfs_buf_rele() (where the buffer lock is no longer held) means that bp->b_flags can be updated from an unsafe context. While a user-level reproducer is currently not available, some intrusive hacks to run racing buffer lookups/ioacct/releases from multiple threads was used to successfully manufacture this problem. Existing callers do not expect to acquire the buffer lock from xfs_buf_rele(). Therefore, we can not safely update ->b_flags from this context. It turns out that we already have separate buffer state bits and associated serialization for dealing with buffer LRU state in the form of ->b_state and ->b_lock. Therefore, replace the _XBF_IN_FLIGHT flag with a ->b_state variant, update the I/O accounting wrappers appropriately and make sure they are used with the correct locking. This ensures that buffer in-flight state can be modified at buffer release time without racing with modifications from a buffer lock holder. Fixes: 9c7504aa72b6 ("xfs: track and serialize in-flight async buffers against unmount") Signed-off-by: Brian Foster Reviewed-by: Nikolay Borisov Tested-by: Libor Pechacek Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 38 ++++++++++++++++++++++++++------------ fs/xfs/xfs_buf.h | 5 ++--- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b6208728ba39..25e0fb059a27 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -97,12 +97,16 @@ static inline void xfs_buf_ioacct_inc( struct xfs_buf *bp) { - if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT)) + if (bp->b_flags & XBF_NO_IOACCT) return; ASSERT(bp->b_flags & XBF_ASYNC); - bp->b_flags |= _XBF_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); + spin_lock(&bp->b_lock); + if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { + bp->b_state |= XFS_BSTATE_IN_FLIGHT; + percpu_counter_inc(&bp->b_target->bt_io_count); + } + spin_unlock(&bp->b_lock); } /* @@ -110,14 +114,24 @@ xfs_buf_ioacct_inc( * freed and unaccount from the buftarg. */ static inline void -xfs_buf_ioacct_dec( +__xfs_buf_ioacct_dec( struct xfs_buf *bp) { - if (!(bp->b_flags & _XBF_IN_FLIGHT)) - return; + ASSERT(spin_is_locked(&bp->b_lock)); - bp->b_flags &= ~_XBF_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); + if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { + bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; + percpu_counter_dec(&bp->b_target->bt_io_count); + } +} + +static inline void +xfs_buf_ioacct_dec( + struct xfs_buf *bp) +{ + spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + spin_unlock(&bp->b_lock); } /* @@ -149,9 +163,9 @@ xfs_buf_stale( * unaccounted (released to LRU) before that occurs. Drop in-flight * status now to preserve accounting consistency. */ - xfs_buf_ioacct_dec(bp); - spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) @@ -979,12 +993,12 @@ xfs_buf_rele( * ensures the decrement occurs only once per-buf. */ if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); goto out_unlock; } /* the last reference has been dropped ... */ - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU take a new reference to the diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 3c867e5a63e1..16e92625dff7 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -63,7 +63,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */ typedef unsigned int xfs_buf_flags_t; @@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_IN_FLIGHT, "IN_FLIGHT" } + { _XBF_COMPOUND, "COMPOUND" } /* * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ /* * The xfs_buftarg contains 2 notions of "sector size" - From a19348b729e85c1e0e08ec9e82115527d7539a14 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 23 May 2017 08:30:46 -0700 Subject: [PATCH 435/465] xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff() commit 8affebe16d79ebefb1d9d6d56a46dc89716f9453 upstream. xfs_find_get_desired_pgoff() is used to search for offset of hole or data in page range [index, end] (both inclusive), and the max number of pages to search should be at least one, if end == index. Otherwise the only page is missed and no hole or data is found, which is not correct. When block size is smaller than page size, this can be demonstrated by preallocating a file with size smaller than page size and writing data to the last block. E.g. run this xfs_io command on a 1k block size XFS on x86_64 host. # xfs_io -fc "falloc 0 3k" -c "pwrite 2k 1k" \ -c "seek -d 0" /mnt/xfs/testfile wrote 1024/1024 bytes at offset 2048 1 KiB, 1 ops; 0.0000 sec (33.675 MiB/sec and 34482.7586 ops/sec) Whence Result DATA EOF Data at offset 2k was missed, and lseek(2) returned ENXIO. This is uncovered by generic/285 subtest 07 and 08 on ppc64 host, where pagesize is 64k. Because a recent change to generic/285 reduced the preallocated file size to smaller than 64k. Signed-off-by: Eryu Guan Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f371812e20c6..f1517e9928c7 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1049,7 +1049,7 @@ xfs_find_get_desired_pgoff( unsigned nr_pages; unsigned int i; - want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); /* From 2cd6bd867a3016098c5942b65f5410d98f805d80 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 28 Mar 2017 14:51:44 -0700 Subject: [PATCH 436/465] xfs: use dedicated log worker wq to avoid deadlock with cil wq commit 696a562072e3c14bcd13ae5acc19cdf27679e865 upstream. The log covering background task used to be part of the xfssyncd workqueue. That workqueue was removed as of commit 5889608df ("xfs: syncd workqueue is no more") and the associated work item scheduled to the xfs-log wq. The latter is used for log buffer I/O completion. Since xfs_log_worker() can invoke a log flush, a deadlock is possible between the xfs-log and xfs-cil workqueues. Consider the following codepath from xfs_log_worker(): xfs_log_worker() xfs_log_force() _xfs_log_force() xlog_cil_force() xlog_cil_force_lsn() xlog_cil_push_now() flush_work() The above is in xfs-log wq context and blocked waiting on the completion of an xfs-cil work item. Concurrently, the cil push in progress can end up blocked here: xlog_cil_push_work() xlog_cil_push() xlog_write() xlog_state_get_iclog_space() xlog_wait(&log->l_flush_wait, ...) The above is in xfs-cil context waiting on log buffer I/O completion, which executes in xfs-log wq context. In this scenario both workqueues are deadlocked waiting on eachother. Add a new workqueue specifically for the high level log covering and ail pushing worker, as was the case prior to commit 5889608df. Diagnosed-by: David Jeffery Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 2 +- fs/xfs/xfs_mount.h | 1 + fs/xfs/xfs_super.c | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b1469f0a91a6..bb58cd1873c9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1293,7 +1293,7 @@ void xfs_log_work_queue( struct xfs_mount *mp) { - queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work, + queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work, msecs_to_jiffies(xfs_syncd_centisecs * 10)); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 6db6fd6b82b0..22b2185e93a0 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -183,6 +183,7 @@ typedef struct xfs_mount { struct workqueue_struct *m_reclaim_workqueue; struct workqueue_struct *m_log_workqueue; struct workqueue_struct *m_eofblocks_workqueue; + struct workqueue_struct *m_sync_workqueue; /* * Generation of the filesysyem layout. This is incremented by each diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 685c042a120f..47d239dcf3f4 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -877,8 +877,15 @@ xfs_init_mount_workqueues( if (!mp->m_eofblocks_workqueue) goto out_destroy_log; + mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, + mp->m_fsname); + if (!mp->m_sync_workqueue) + goto out_destroy_eofb; + return 0; +out_destroy_eofb: + destroy_workqueue(mp->m_eofblocks_workqueue); out_destroy_log: destroy_workqueue(mp->m_log_workqueue); out_destroy_reclaim: @@ -899,6 +906,7 @@ STATIC void xfs_destroy_mount_workqueues( struct xfs_mount *mp) { + destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_eofblocks_workqueue); destroy_workqueue(mp->m_log_workqueue); destroy_workqueue(mp->m_reclaim_workqueue); From 1c862f5a3e14e3bfbd7af95636cc19bfa6c44b0d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Apr 2017 15:17:57 -0700 Subject: [PATCH 437/465] xfs: fix over-copying of getbmap parameters from userspace commit be6324c00c4d1e0e665f03ed1fc18863a88da119 upstream. In xfs_ioc_getbmap, we should only copy the fields of struct getbmap from userspace, or else we end up copying random stack contents into the kernel. struct getbmap is a strict subset of getbmapx, so a partial structure copy should work fine. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 2fd7fdf5438f..6d30b06e79bc 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1543,10 +1543,11 @@ xfs_ioc_getbmap( unsigned int cmd, void __user *arg) { - struct getbmapx bmx; + struct getbmapx bmx = { 0 }; int error; - if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) + /* struct getbmap is a strict subset of struct getbmapx. */ + if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags))) return -EFAULT; if (bmx.bmv_count < 2) From 8a1f785887d32c1d5206051a78a22529d61248bf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 6 Apr 2017 16:00:39 -0700 Subject: [PATCH 438/465] xfs: actually report xattr extents via iomap commit 84358536dc355a9c8978ee425f87e116186bed16 upstream. Apparently FIEMAP for xattrs has been broken since we switched to the iomap backend because of an incorrect check for xattr presence. Also fix the broken locking. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 288ee5b840d7..79e6dfac3dd6 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1170,10 +1170,10 @@ xfs_xattr_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - lockmode = xfs_ilock_data_map_shared(ip); + lockmode = xfs_ilock_attr_map_shared(ip); /* if there are no attribute fork or extents, return ENOENT */ - if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { + if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { error = -ENOENT; goto out_unlock; } From eff615a670145310a4b27b9485d7ac98e95c71e1 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 11 Apr 2017 10:50:05 -0700 Subject: [PATCH 439/465] xfs: drop iolock from reclaim context to appease lockdep commit 3b4683c294095b5f777c03307ef8c60f47320e12 upstream. Lockdep complains about use of the iolock in inode reclaim context because it doesn't understand that reclaim has the last reference to the inode, and thus an iolock->reclaim->iolock deadlock is not possible. The iolock is technically not necessary in xfs_inactive() and was only added to appease an assert in xfs_free_eofblocks(), which can be called from other non-reclaim contexts. Therefore, just kill the assert and drop the use of the iolock from reclaim context to quiet lockdep. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 8 +++----- fs/xfs/xfs_inode.c | 9 +++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 828532ce0adc..cad4e3e2df00 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -904,9 +904,9 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) } /* - * This is called by xfs_inactive to free any blocks beyond eof - * when the link count isn't zero and by xfs_dm_punch_hole() when - * punching a hole to EOF. + * This is called to free any blocks beyond eof. The caller must hold + * IOLOCK_EXCL unless we are in the inode reclaim path and have the only + * reference to the inode. */ int xfs_free_eofblocks( @@ -921,8 +921,6 @@ xfs_free_eofblocks( struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - /* * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7605d8396596..ec9826c56500 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1906,12 +1906,13 @@ xfs_inactive( * force is true because we are evicting an inode from the * cache. Post-eof blocks must be freed, lest we end up with * broken free space accounting. + * + * Note: don't bother with iolock here since lockdep complains + * about acquiring it in reclaim context. We have the only + * reference to the inode at this point anyways. */ - if (xfs_can_free_eofblocks(ip, true)) { - xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (xfs_can_free_eofblocks(ip, true)) xfs_free_eofblocks(ip); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } return; } From 23da04dcc3af73fd69ed4229bf8e095ed524d7c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2017 16:45:52 -0700 Subject: [PATCH 440/465] xfs: fix integer truncation in xfs_bmap_remap_alloc commit 52813fb13ff90bd9c39a93446cbf1103c290b6e9 upstream. bno should be a xfs_fsblock_t, which is 64-bit wides instead of a xfs_aglock_t, which truncates the value to 32 bits. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 9bd104f32908..2a426d127e05 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3863,7 +3863,7 @@ xfs_bmap_remap_alloc( { struct xfs_trans *tp = ap->tp; struct xfs_mount *mp = tp->t_mountp; - xfs_agblock_t bno; + xfs_fsblock_t bno; struct xfs_alloc_arg args; int error; From 91bb4f7da5814686e5911941569f93da2e5d8f67 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 13 Apr 2017 15:15:47 -0700 Subject: [PATCH 441/465] xfs: handle array index overrun in xfs_dir2_leaf_readbuf() commit 023cc840b40fad95c6fe26fff1d380a8c9d45939 upstream. Carlos had a case where "find" seemed to start spinning forever and never return. This was on a filesystem with non-default multi-fsb (8k) directory blocks, and a fragmented directory with extents like this: 0:[0,133646,2,0] 1:[2,195888,1,0] 2:[3,195890,1,0] 3:[4,195892,1,0] 4:[5,195894,1,0] 5:[6,195896,1,0] 6:[7,195898,1,0] 7:[8,195900,1,0] 8:[9,195902,1,0] 9:[10,195908,1,0] 10:[11,195910,1,0] 11:[12,195912,1,0] 12:[13,195914,1,0] ... i.e. the first extent is a contiguous 2-fsb dir block, but after that it is fragmented into 1 block extents. At the top of the readdir path, we allocate a mapping array which (for this filesystem geometry) can hold 10 extents; see the assignment to map_info->map_size. During readdir, we are therefore able to map extents 0 through 9 above into the array for readahead purposes. If we count by 2, we see that the last mapped index (9) is the first block of a 2-fsb directory block. At the end of xfs_dir2_leaf_readbuf() we have 2 loops to fill more readahead; the outer loop assumes one full dir block is processed each loop iteration, and an inner loop that ensures that this is so by advancing to the next extent until a full directory block is mapped. The problem is that this inner loop may step past the last extent in the mapping array as it tries to reach the end of the directory block. This will read garbage for the extent length, and as a result the loop control variable 'j' may become corrupted and never fail the loop conditional. The number of valid mappings we have in our array is stored in map->map_valid, so stop this inner loop based on that limit. There is an ASSERT at the top of the outer loop for this same condition, but we never made it out of the inner loop, so the ASSERT never fired. Huge appreciation for Carlos for debugging and isolating the problem. Debugged-and-analyzed-by: Carlos Maiolino Signed-off-by: Eric Sandeen Tested-by: Carlos Maiolino Reviewed-by: Carlos Maiolino Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dir2_readdir.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index ad9396e516f6..c45de729c74e 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -394,6 +394,7 @@ xfs_dir2_leaf_readbuf( /* * Do we need more readahead? + * Each loop tries to process 1 full dir blk; last may be partial. */ blk_start_plug(&plug); for (mip->ra_index = mip->ra_offset = i = 0; @@ -425,9 +426,14 @@ xfs_dir2_leaf_readbuf( } /* - * Advance offset through the mapping table. + * Advance offset through the mapping table, processing a full + * dir block even if it is fragmented into several extents. + * But stop if we have consumed all valid mappings, even if + * it's not yet a full directory block. */ - for (j = 0; j < geo->fsbcount; j += length ) { + for (j = 0; + j < geo->fsbcount && mip->ra_index < mip->map_valid; + j += length ) { /* * The rest of this extent but not more than a dir * block. From 89aab40e1208738ec114d516541c8928a949574f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 20 Apr 2017 08:06:47 -0700 Subject: [PATCH 442/465] xfs: prevent multi-fsb dir readahead from reading random blocks commit cb52ee334a45ae6c78a3999e4b473c43ddc528f4 upstream. Directory block readahead uses a complex iteration mechanism to map between high-level directory blocks and underlying physical extents. This mechanism attempts to traverse the higher-level dir blocks in a manner that handles multi-fsb directory blocks and simultaneously maintains a reference to the corresponding physical blocks. This logic doesn't handle certain (discontiguous) physical extent layouts correctly with multi-fsb directory blocks. For example, consider the case of a 4k FSB filesystem with a 2 FSB (8k) directory block size and a directory with the following extent layout: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL 0: [0..7]: 88..95 0 (88..95) 8 1: [8..15]: 80..87 0 (80..87) 8 2: [16..39]: 168..191 0 (168..191) 24 3: [40..63]: 5242952..5242975 1 (72..95) 24 Directory block 0 spans physical extents 0 and 1, dirblk 1 lies entirely within extent 2 and dirblk 2 spans extents 2 and 3. Because extent 2 is larger than the directory block size, the readahead code erroneously assumes the block is contiguous and issues a readahead based on the physical mapping of the first fsb of the dirblk. This results in read verifier failure and a spurious corruption or crc failure, depending on the filesystem format. Further, the subsequent readahead code responsible for walking through the physical table doesn't correctly advance the physical block reference for dirblk 2. Instead of advancing two physical filesystem blocks, the first iteration of the loop advances 1 block (correctly), but the subsequent iteration advances 2 more physical blocks because the next physical extent (extent 3, above) happens to cover more than dirblk 2. At this point, the higher-level directory block walking is completely off the rails of the actual physical layout of the directory for the respective mapping table. Update the contiguous dirblock logic to consider the current offset in the physical extent to avoid issuing directory readahead to unrelated blocks. Also, update the mapping table advancing code to consider the current offset within the current dirblock to avoid advancing the mapping reference too far beyond the dirblock. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dir2_readdir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index c45de729c74e..20b7a5c6eb2f 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -405,7 +405,8 @@ xfs_dir2_leaf_readbuf( * Read-ahead a contiguous directory block. */ if (i > mip->ra_current && - map[mip->ra_index].br_blockcount >= geo->fsbcount) { + (map[mip->ra_index].br_blockcount - mip->ra_offset) >= + geo->fsbcount) { xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, XFS_FSB_TO_DADDR(dp->i_mount, @@ -438,7 +439,7 @@ xfs_dir2_leaf_readbuf( * The rest of this extent but not more than a dir * block. */ - length = min_t(int, geo->fsbcount, + length = min_t(int, geo->fsbcount - j, map[mip->ra_index].br_blockcount - mip->ra_offset); mip->ra_offset += length; From bf16242614e25923597bd8de0fc2d554b85c8330 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 21 Apr 2017 12:40:44 -0700 Subject: [PATCH 443/465] xfs: fix up quotacheck buffer list error handling commit 20e8a063786050083fe05b4f45be338c60b49126 upstream. The quotacheck error handling of the delwri buffer list assumes the resident buffers are locked and doesn't clear the _XBF_DELWRI_Q flag on the buffers that are dequeued. This can lead to assert failures on buffer release and possibly other locking problems. Move this code to a delwri queue cancel helper function to encapsulate the logic required to properly release buffers from a delwri queue. Update the helper to clear the delwri queue flag and call it from quotacheck. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 24 ++++++++++++++++++++++++ fs/xfs/xfs_buf.h | 1 + fs/xfs/xfs_qm.c | 7 +------ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 25e0fb059a27..4e19fdabac9d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1093,6 +1093,8 @@ void xfs_buf_unlock( struct xfs_buf *bp) { + ASSERT(xfs_buf_islocked(bp)); + XB_CLEAR_OWNER(bp); up(&bp->b_sema); @@ -1828,6 +1830,28 @@ xfs_alloc_buftarg( return NULL; } +/* + * Cancel a delayed write list. + * + * Remove each buffer from the list, clear the delwri queue flag and drop the + * associated buffer reference. + */ +void +xfs_buf_delwri_cancel( + struct list_head *list) +{ + struct xfs_buf *bp; + + while (!list_empty(list)) { + bp = list_first_entry(list, struct xfs_buf, b_list); + + xfs_buf_lock(bp); + bp->b_flags &= ~_XBF_DELWRI_Q; + list_del_init(&bp->b_list); + xfs_buf_relse(bp); + } +} + /* * Add a buffer to the delayed write list. * diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 16e92625dff7..39d9334e59ee 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -329,6 +329,7 @@ extern void *xfs_buf_offset(struct xfs_buf *, size_t); extern void xfs_buf_stale(struct xfs_buf *bp); /* Delayed Write Buffer Routines */ +extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b669b123287b..8b9a9f15f022 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1384,12 +1384,7 @@ xfs_qm_quotacheck( mp->m_qflags |= flags; error_return: - while (!list_empty(&buffer_list)) { - struct xfs_buf *bp = - list_first_entry(&buffer_list, struct xfs_buf, b_list); - list_del_init(&bp->b_list); - xfs_buf_relse(bp); - } + xfs_buf_delwri_cancel(&buffer_list); if (error) { xfs_warn(mp, From 56aab3095b6cb0d40a9b04c316bb9306dc17aff3 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 26 Apr 2017 08:30:39 -0700 Subject: [PATCH 444/465] xfs: support ability to wait on new inodes commit 756baca27fff3ecaeab9dbc7a5ee35a1d7bc0c7f upstream. Inodes that are inserted into the perag tree but still under construction are flagged with the XFS_INEW bit. Most contexts either skip such inodes when they are encountered or have the ability to handle them. The runtime quotaoff sequence introduces a context that must wait for construction of such inodes to correctly ensure that all dquots in the fs are released. In anticipation of this, support the ability to wait on new inodes. Wake the appropriate bit when XFS_INEW is cleared. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 5 ++++- fs/xfs/xfs_inode.h | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 3531f8f72fa5..25f4f4595821 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -366,14 +366,17 @@ xfs_iget_cache_hit( error = xfs_reinit_inode(mp, inode); if (error) { + bool wake; /* * Re-initializing the inode failed, and we are in deep * trouble. Try to re-add it to the reclaim list. */ rcu_read_lock(); spin_lock(&ip->i_flags_lock); - + wake = !!__xfs_iflags_test(ip, XFS_INEW); ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + if (wake) + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 10dcf27b4c85..10e89fcb49d7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -216,7 +216,8 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ -#define XFS_INEW (1 << 3) /* inode has just been allocated */ +#define __XFS_INEW_BIT 3 /* inode has just been allocated */ +#define XFS_INEW (1 << __XFS_INEW_BIT) #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -464,6 +465,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip) xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(VFS_I(ip)); + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); } static inline void xfs_setup_existing_inode(struct xfs_inode *ip) From f8c68633bb9560d04bf62912136b47630686b8d1 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 26 Apr 2017 08:30:39 -0700 Subject: [PATCH 445/465] xfs: update ag iterator to support wait on new inodes commit ae2c4ac2dd39b23a87ddb14ceddc3f2872c6aef5 upstream. The AG inode iterator currently skips new inodes as such inodes are inserted into the inode radix tree before they are fully constructed. Certain contexts require the ability to wait on the construction of new inodes, however. The fs-wide dquot release from the quotaoff sequence is an example of this. Update the AG inode iterator to support the ability to wait on inodes flagged with XFS_INEW upon request. Create a new xfs_inode_ag_iterator_flags() interface and support a set of iteration flags to modify the iteration behavior. When the XFS_AGITER_INEW_WAIT flag is set, include XFS_INEW flags in the radix tree inode lookup and wait on them before the callback is executed. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 53 ++++++++++++++++++++++++++++++++++++++------- fs/xfs/xfs_icache.h | 8 +++++++ 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 25f4f4595821..f61c84f8e31a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -262,6 +262,22 @@ xfs_inode_clear_reclaim_tag( xfs_perag_clear_reclaim_tag(pag); } +static void +xfs_inew_wait( + struct xfs_inode *ip) +{ + wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT); + DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); + + do { + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + if (!xfs_iflags_test(ip, XFS_INEW)) + break; + schedule(); + } while (true); + finish_wait(wq, &wait.wait); +} + /* * When we recycle a reclaimable inode, we need to re-initialise the VFS inode * part of the structure. This is made more complex by the fact we store @@ -626,9 +642,11 @@ xfs_iget( STATIC int xfs_inode_ag_walk_grab( - struct xfs_inode *ip) + struct xfs_inode *ip, + int flags) { struct inode *inode = VFS_I(ip); + bool newinos = !!(flags & XFS_AGITER_INEW_WAIT); ASSERT(rcu_read_lock_held()); @@ -646,7 +664,8 @@ xfs_inode_ag_walk_grab( goto out_unlock_noent; /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) || + __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)) goto out_unlock_noent; spin_unlock(&ip->i_flags_lock); @@ -674,7 +693,8 @@ xfs_inode_ag_walk( void *args), int flags, void *args, - int tag) + int tag, + int iter_flags) { uint32_t first_index; int last_error = 0; @@ -716,7 +736,7 @@ xfs_inode_ag_walk( for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; - if (done || xfs_inode_ag_walk_grab(ip)) + if (done || xfs_inode_ag_walk_grab(ip, iter_flags)) batch[i] = NULL; /* @@ -744,6 +764,9 @@ xfs_inode_ag_walk( for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; + if ((iter_flags & XFS_AGITER_INEW_WAIT) && + xfs_iflags_test(batch[i], XFS_INEW)) + xfs_inew_wait(batch[i]); error = execute(batch[i], flags, args); IRELE(batch[i]); if (error == -EAGAIN) { @@ -823,12 +846,13 @@ xfs_cowblocks_worker( } int -xfs_inode_ag_iterator( +xfs_inode_ag_iterator_flags( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, - void *args) + void *args, + int iter_flags) { struct xfs_perag *pag; int error = 0; @@ -838,7 +862,8 @@ xfs_inode_ag_iterator( ag = 0; while ((pag = xfs_perag_get(mp, ag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1, + iter_flags); xfs_perag_put(pag); if (error) { last_error = error; @@ -849,6 +874,17 @@ xfs_inode_ag_iterator( return last_error; } +int +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, + void *args), + int flags, + void *args) +{ + return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0); +} + int xfs_inode_ag_iterator_tag( struct xfs_mount *mp, @@ -866,7 +902,8 @@ xfs_inode_ag_iterator_tag( ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, tag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag, + 0); xfs_perag_put(pag); if (error) { last_error = error; diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a7c849b4dea..9183f77958ef 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -48,6 +48,11 @@ struct xfs_eofblocks { #define XFS_IGET_UNTRUSTED 0x2 #define XFS_IGET_DONTCACHE 0x4 +/* + * flags for AG inode iterator + */ +#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */ + int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp); @@ -79,6 +84,9 @@ void xfs_cowblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); +int xfs_inode_ag_iterator_flags(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, void *args), + int flags, void *args, int iter_flags); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, int tag); From 67e439ccfe51bc393a1fabaa78340d4cd60386d0 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 26 Apr 2017 08:30:40 -0700 Subject: [PATCH 446/465] xfs: wait on new inodes during quotaoff dquot release commit e20c8a517f259cb4d258e10b0cd5d4b30d4167a0 upstream. The quotaoff operation has a race with inode allocation that results in a livelock. An inode allocation that occurs before the quota status flags are updated acquires the appropriate dquots for the inode via xfs_qm_vop_dqalloc(). It then inserts the XFS_INEW inode into the perag radix tree, sometime later attaches the dquots to the inode and finally clears the XFS_INEW flag. Quotaoff expects to release the dquots from all inodes in the filesystem via xfs_qm_dqrele_all_inodes(). This invokes the AG inode iterator, which skips inodes in the XFS_INEW state because they are not fully constructed. If the scan occurs after dquots have been attached to an inode, but before XFS_INEW is cleared, the newly allocated inode will continue to hold a reference to the applicable dquots. When quotaoff invokes xfs_qm_dqpurge_all(), the reference count of those dquot(s) remain elevated and the dqpurge scan spins indefinitely. To address this problem, update the xfs_qm_dqrele_all_inodes() scan to wait on inodes marked on the XFS_INEW state. We wait on the inodes explicitly rather than skip and retry to avoid continuous retry loops due to a parallel inode allocation workload. Since quotaoff updates the quota state flags and uses a synchronous transaction before the dqrele scan, and dquots are attached to inodes after radix tree insertion iff quota is enabled, one INEW waiting pass through the AG guarantees that the scan has processed all inodes that could possibly hold dquot references. Reported-by: Eryu Guan Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_qm_syscalls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 475a3882a81f..9cb5c381b01c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -759,5 +759,6 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); + xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL, + XFS_AGITER_INEW_WAIT); } From 365625a6701bac04e76d6b33eff7203f4d953f1d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 12 Apr 2017 12:26:07 -0700 Subject: [PATCH 447/465] xfs: reserve enough blocks to handle btree splits when remapping commit fe0be23e68200573de027de9b8cc2b27e7fce35e upstream. In xfs_reflink_end_cow, we erroneously reserve only enough blocks to handle adding 1 extent. This is problematic if we fragment free space, have to do CoW, and then have to perform multiple bmap btree expansions. Furthermore, the BUI recovery routine doesn't reserve /any/ blocks to handle btree splits, so log recovery fails after our first error causes the filesystem to go down. Therefore, refactor the transaction block reservation macros until we have a macro that works for our deferred (re)mapping activities, and fix both problems by using that macro. With 1k blocks we can hit this fairly often in g/187 if the scratch fs is big enough. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_trans_space.h | 23 +++++++++++++++++------ fs/xfs/xfs_bmap_item.c | 5 ++++- fs/xfs/xfs_reflink.c | 18 ++++++++++++++++-- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 7917f6e44286..d787c677d2a3 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -21,8 +21,20 @@ /* * Components of space reservations. */ + +/* Worst case number of rmaps that can be held in a block. */ #define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \ (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0])) + +/* Adding one rmap could split every level up to the top of the tree. */ +#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels) + +/* Blocks we might need to add "b" rmaps to a tree. */ +#define XFS_NRMAPADD_SPACE_RES(mp, b)\ + (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \ + XFS_RMAPADD_SPACE_RES(mp)) + #define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \ (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0])) #define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1) @@ -30,13 +42,12 @@ (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ XFS_EXTENTADD_SPACE_RES(mp,w)) + +/* Blocks we might need to add "b" mappings & rmappings to a file. */ #define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\ - (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ - XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ - XFS_EXTENTADD_SPACE_RES(mp,w) + \ - ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ - XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \ - (mp)->m_rmap_maxlevels) + (XFS_NEXTENTADD_SPACE_RES((mp), (b), (w)) + \ + XFS_NRMAPADD_SPACE_RES((mp), (b))) + #define XFS_DAENTER_1B(mp,w) \ ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) #define XFS_DAENTER_DBS(mp,w) \ diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 9bf57c76623b..c4b90e794e41 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -34,6 +34,8 @@ #include "xfs_bmap.h" #include "xfs_icache.h" #include "xfs_trace.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" kmem_zone_t *xfs_bui_zone; @@ -446,7 +448,8 @@ xfs_bui_recover( return -EIO; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); if (error) return error; budp = xfs_trans_get_bud(tp, buip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 4a84c5ea266d..20c46d2ff4f5 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -709,8 +709,22 @@ xfs_reflink_end_cow( offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); - /* Start a rolling transaction to switch the mappings */ - resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); + /* + * Start a rolling transaction to switch the mappings. We're + * unlikely ever to have to remap 16T worth of single-block + * extents, so just cap the worst case extent count to 2^32-1. + * Stick a warning in just in case, and avoid 64-bit division. + */ + BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX); + if (end_fsb - offset_fsb > UINT_MAX) { + error = -EFSCORRUPTED; + xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE); + ASSERT(0); + goto out; + } + resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount, + (unsigned int)(end_fsb - offset_fsb), + XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, resblks, 0, 0, &tp); if (error) From d1a8ae21c3560d8c4b83e71008ec21fbf317bdb9 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 2 May 2017 13:54:47 -0700 Subject: [PATCH 448/465] xfs: fix use-after-free in xfs_finish_page_writeback commit 161f55efba5ddccc690139fae9373cafc3447a97 upstream. Commit 28b783e47ad7 ("xfs: bufferhead chains are invalid after end_page_writeback") fixed one use-after-free issue by pre-calculating the loop conditionals before calling bh->b_end_io() in the end_io processing loop, but it assigned 'next' pointer before checking end offset boundary & breaking the loop, at which point the bh might be freed already, and caused use-after-free. This is caught by KASAN when running fstests generic/127 on sub-page block size XFS. [ 2517.244502] run fstests generic/127 at 2017-04-27 07:30:50 [ 2747.868840] ================================================================== [ 2747.876949] BUG: KASAN: use-after-free in xfs_destroy_ioend+0x3d3/0x4e0 [xfs] at addr ffff8801395ae698 ... [ 2747.918245] Call Trace: [ 2747.920975] dump_stack+0x63/0x84 [ 2747.924673] kasan_object_err+0x21/0x70 [ 2747.928950] kasan_report+0x271/0x530 [ 2747.933064] ? xfs_destroy_ioend+0x3d3/0x4e0 [xfs] [ 2747.938409] ? end_page_writeback+0xce/0x110 [ 2747.943171] __asan_report_load8_noabort+0x19/0x20 [ 2747.948545] xfs_destroy_ioend+0x3d3/0x4e0 [xfs] [ 2747.953724] xfs_end_io+0x1af/0x2b0 [xfs] [ 2747.958197] process_one_work+0x5ff/0x1000 [ 2747.962766] worker_thread+0xe4/0x10e0 [ 2747.966946] kthread+0x2d3/0x3d0 [ 2747.970546] ? process_one_work+0x1000/0x1000 [ 2747.975405] ? kthread_create_on_node+0xc0/0xc0 [ 2747.980457] ? syscall_return_slowpath+0xe6/0x140 [ 2747.985706] ? do_page_fault+0x30/0x80 [ 2747.989887] ret_from_fork+0x2c/0x40 [ 2747.993874] Object at ffff8801395ae690, in cache buffer_head size: 104 [ 2748.001155] Allocated: [ 2748.003782] PID = 8327 [ 2748.006411] save_stack_trace+0x1b/0x20 [ 2748.010688] save_stack+0x46/0xd0 [ 2748.014383] kasan_kmalloc+0xad/0xe0 [ 2748.018370] kasan_slab_alloc+0x12/0x20 [ 2748.022648] kmem_cache_alloc+0xb8/0x1b0 [ 2748.027024] alloc_buffer_head+0x22/0xc0 [ 2748.031399] alloc_page_buffers+0xd1/0x250 [ 2748.035968] create_empty_buffers+0x30/0x410 [ 2748.040730] create_page_buffers+0x120/0x1b0 [ 2748.045493] __block_write_begin_int+0x17a/0x1800 [ 2748.050740] iomap_write_begin+0x100/0x2f0 [ 2748.055308] iomap_zero_range_actor+0x253/0x5c0 [ 2748.060362] iomap_apply+0x157/0x270 [ 2748.064347] iomap_zero_range+0x5a/0x80 [ 2748.068624] iomap_truncate_page+0x6b/0xa0 [ 2748.073227] xfs_setattr_size+0x1f7/0xa10 [xfs] [ 2748.078312] xfs_vn_setattr_size+0x68/0x140 [xfs] [ 2748.083589] xfs_file_fallocate+0x4ac/0x820 [xfs] [ 2748.088838] vfs_fallocate+0x2cf/0x780 [ 2748.093021] SyS_fallocate+0x48/0x80 [ 2748.097006] do_syscall_64+0x18a/0x430 [ 2748.101186] return_from_SYSCALL_64+0x0/0x6a [ 2748.105948] Freed: [ 2748.108189] PID = 8327 [ 2748.110816] save_stack_trace+0x1b/0x20 [ 2748.115093] save_stack+0x46/0xd0 [ 2748.118788] kasan_slab_free+0x73/0xc0 [ 2748.122969] kmem_cache_free+0x7a/0x200 [ 2748.127247] free_buffer_head+0x41/0x80 [ 2748.131524] try_to_free_buffers+0x178/0x250 [ 2748.136316] xfs_vm_releasepage+0x2e9/0x3d0 [xfs] [ 2748.141563] try_to_release_page+0x100/0x180 [ 2748.146325] invalidate_inode_pages2_range+0x7da/0xcf0 [ 2748.152087] xfs_shift_file_space+0x37d/0x6e0 [xfs] [ 2748.157557] xfs_collapse_file_space+0x49/0x120 [xfs] [ 2748.163223] xfs_file_fallocate+0x2a7/0x820 [xfs] [ 2748.168462] vfs_fallocate+0x2cf/0x780 [ 2748.172642] SyS_fallocate+0x48/0x80 [ 2748.176629] do_syscall_64+0x18a/0x430 [ 2748.180810] return_from_SYSCALL_64+0x0/0x6a Fixed it by checking on offset against end & breaking out first, dereference bh only if there're still bufferheads to process. Signed-off-by: Eryu Guan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 61494295d92f..a7645be51d87 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -111,11 +111,11 @@ xfs_finish_page_writeback( bsize = bh->b_size; do { + if (off > end) + break; next = bh->b_this_page; if (off < bvec->bv_offset) goto next_bh; - if (off > end) - break; bh->b_end_io(bh, !error); next_bh: off += bsize; From 1ae26380c860e76237e2d41bfba3fd01119f21b4 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 12 May 2017 10:44:08 -0700 Subject: [PATCH 449/465] xfs: fix indlen accounting error on partial delalloc conversion commit 0daaecacb83bc6b656a56393ab77a31c28139bc7 upstream. The delalloc -> real block conversion path uses an incorrect calculation in the case where the middle part of a delalloc extent is being converted. This is documented as a rare situation because XFS generally attempts to maximize contiguity by converting as much of a delalloc extent as possible. If this situation does occur, the indlen reservation for the two new delalloc extents left behind by the conversion of the middle range is calculated and compared with the original reservation. If more blocks are required, the delta is allocated from the global block pool. This delta value can be characterized as the difference between the new total requirement (temp + temp2) and the currently available reservation minus those blocks that have already been allocated (startblockval(PREV.br_startblock) - allocated). The problem is that the current code does not account for previously allocated blocks correctly. It subtracts the current allocation count from the (new - old) delta rather than the old indlen reservation. This means that more indlen blocks than have been allocated end up stashed in the remaining extents and free space accounting is broken as a result. Fix up the calculation to subtract the allocated block count from the original extent indlen and thus correctly allocate the reservation delta based on the difference between the new total requirement and the unused blocks from the original reservation. Also remove a bogus assert that contradicts the fact that the new indlen reservation can be larger than the original indlen reservation. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2a426d127e05..6c68f3b888b0 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2106,8 +2106,10 @@ xfs_bmap_add_extent_delay_real( } temp = xfs_bmap_worst_indlen(bma->ip, temp); temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + diff = (int)(temp + temp2 - + (startblockval(PREV.br_startblock) - + (bma->cur ? + bma->cur->bc_private.b.allocated : 0))); if (diff > 0) { error = xfs_mod_fdblocks(bma->ip->i_mount, -((int64_t)diff), false); @@ -2164,7 +2166,6 @@ xfs_bmap_add_extent_delay_real( temp = da_new; if (bma->cur) temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); if (temp < da_old) xfs_mod_fdblocks(bma->ip->i_mount, (int64_t)(da_old - temp), false); From 2dc6e27120c2fcb3738490fa688e85a19457a2c2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 12 May 2017 10:44:08 -0700 Subject: [PATCH 450/465] xfs: BMAPX shouldn't barf on inline-format directories commit 6eadbf4c8ba816c10d1c97bed9aa861d9fd17809 upstream. When we're fulfilling a BMAPX request, jump out early if the data fork is in local format. This prevents us from hitting a debugging check in bmapi_read and barfing errors back to userspace. The on-disk extent count check later isn't sufficient for IF_DELALLOC mode because da extents are in memory and not on disk. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index cad4e3e2df00..9581e90fd559 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -583,9 +583,13 @@ xfs_getbmap( } break; default: + /* Local format data forks report no extents. */ + if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + bmv->bmv_entries = 0; + return 0; + } if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE && - ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + ip->i_d.di_format != XFS_DINODE_FMT_BTREE) return -EINVAL; if (xfs_get_extsz_hint(ip) || From 2e08bd63dc7d7dc5026d546ff3c7fe52394dfc5b Mon Sep 17 00:00:00 2001 From: Zorro Lang Date: Mon, 15 May 2017 08:40:02 -0700 Subject: [PATCH 451/465] xfs: bad assertion for delalloc an extent that start at i_size commit 892d2a5f705723b2cb488bfb38bcbdcf83273184 upstream. By run fsstress long enough time enough in RHEL-7, I find an assertion failure (harder to reproduce on linux-4.11, but problem is still there): XFS: Assertion failed: (iflags & BMV_IF_DELALLOC) != 0, file: fs/xfs/xfs_bmap_util.c The assertion is in xfs_getbmap() funciton: if (map[i].br_startblock == DELAYSTARTBLOCK && --> map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); When map[i].br_startoff == XFS_B_TO_FSB(mp, XFS_ISIZE(ip)), the startoff is just at EOF. But we only need to make sure delalloc extents that are within EOF, not include EOF. Signed-off-by: Zorro Lang Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 9581e90fd559..70fbecbb2845 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -717,7 +717,7 @@ xfs_getbmap( * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && - map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) + map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && From ecb42615267f2ac94aca35642ee174a0d79eedbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 15:18:31 +0200 Subject: [PATCH 452/465] xfs: xfs_trans_alloc_empty This is a partial cherry-pick of commit e89c041338 ("xfs: implement the GETFSMAP ioctl"), which also adds this helper, and a great example of why feature patches should be properly split into their parts. Signed-off-by: Darrick J. Wong [hch: split from the larger patch for -stable] Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_trans.c | 22 ++++++++++++++++++++++ fs/xfs/xfs_trans.h | 2 ++ 2 files changed, 24 insertions(+) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 70f42ea86dfb..a280e126491f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -262,6 +262,28 @@ xfs_trans_alloc( return 0; } +/* + * Create an empty transaction with no reservation. This is a defensive + * mechanism for routines that query metadata without actually modifying + * them -- if the metadata being queried is somehow cross-linked (think a + * btree block pointer that points higher in the tree), we risk deadlock. + * However, blocks grabbed as part of a transaction can be re-grabbed. + * The verifiers will notice the corrupt block and the operation will fail + * back to userspace without deadlocking. + * + * Note the zero-length reservation; this transaction MUST be cancelled + * without any dirty data. + */ +int +xfs_trans_alloc_empty( + struct xfs_mount *mp, + struct xfs_trans **tpp) +{ + struct xfs_trans_res resv = {0}; + + return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); +} + /* * Record the indicated change to the given field for application * to the file system's superblock when the transaction commits. diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 1646f659b60f..2a9292df6640 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -158,6 +158,8 @@ typedef struct xfs_trans { int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, uint blocks, uint rtextents, uint flags, struct xfs_trans **tpp); +int xfs_trans_alloc_empty(struct xfs_mount *mp, + struct xfs_trans **tpp); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, From fff5729ce2b2e6fb03f66049f06340d17dbb9529 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 May 2017 19:16:15 -0700 Subject: [PATCH 453/465] xfs: avoid mount-time deadlock in CoW extent recovery commit 3ecb3ac7b950ff8f6c6a61e8b7b0d6e3546429a0 upstream. If a malicious user corrupts the refcount btree to cause a cycle between different levels of the tree, the next mount attempt will deadlock in the CoW recovery routine while grabbing buffer locks. We can use the ability to re-grab a buffer that was previous locked to a transaction to avoid deadlocks, so do that here. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_refcount.c | 43 ++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index b177ef33cd4c..82a38d86ebad 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers( if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) return -EOPNOTSUPP; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + INIT_LIST_HEAD(&debris); + + /* + * In this first part, we use an empty transaction to gather up + * all the leftover CoW extents so that we can subsequently + * delete them. The empty transaction is used to avoid + * a buffer lock deadlock if there happens to be a loop in the + * refcountbt because we're allowed to re-grab a buffer that is + * already attached to our transaction. When we're done + * recording the CoW debris we cancel the (empty) transaction + * and everything goes away cleanly. + */ + error = xfs_trans_alloc_empty(mp, &tp); if (error) return error; - cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + if (error) + goto out_trans; + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ - INIT_LIST_HEAD(&debris); memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); low.rc.rc_startblock = XFS_REFC_COW_START; @@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers( if (error) goto out_cursor; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); + xfs_trans_cancel(tp); /* Now iterate the list to free the leftovers */ - list_for_each_entry(rr, &debris, rr_list) { + list_for_each_entry_safe(rr, n, &debris, rr_list) { /* Set up transaction. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); if (error) @@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers( error = xfs_trans_commit(tp); if (error) goto out_free; + + list_del(&rr->rr_list); + kmem_free(rr); } + return error; +out_defer: + xfs_defer_cancel(&dfops); +out_trans: + xfs_trans_cancel(tp); out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { @@ -1688,11 +1712,6 @@ xfs_refcount_recover_cow_leftovers( out_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_buf_relse(agbp); - goto out_free; - -out_defer: - xfs_defer_cancel(&dfops); - xfs_trans_cancel(tp); - goto out_free; + xfs_trans_brelse(tp, agbp); + goto out_trans; } From d514c634a4f267b98463b43bf091fb764afbadf7 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 22 May 2017 19:54:10 -0700 Subject: [PATCH 454/465] xfs: fix unaligned access in xfs_btree_visit_blocks commit a4d768e702de224cc85e0c8eac9311763403b368 upstream. This structure copy was throwing unaligned access warnings on sparc64: Kernel unaligned access at TPC[1043c088] xfs_btree_visit_blocks+0x88/0xe0 [xfs] xfs_btree_copy_ptrs does a memcpy, which avoids it. Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index c3decedc9455..3d0f96d159ff 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks( xfs_btree_readahead_ptr(cur, ptr, 1); /* save for the next iteration of the loop */ - lptr = *ptr; + xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); } /* for each buffer in the level */ From b5ff97c77487869355fb6eafe79882e6e55ed498 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 May 2017 16:36:23 -0700 Subject: [PATCH 455/465] xfs: Fix off-by-in in loop termination in xfs_find_get_desired_pgoff() commit d7fd24257aa60316bf81093f7f909dc9475ae974 upstream. There is an off-by-one error in loop termination conditions in xfs_find_get_desired_pgoff() since 'end' may index a page beyond end of desired range if 'endoff' is page aligned. It doesn't have any visible effects but still it is good to fix it. Signed-off-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f1517e9928c7..dc0e4cb7029b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1043,7 +1043,7 @@ xfs_find_get_desired_pgoff( index = startoff >> PAGE_SHIFT; endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); - end = endoff >> PAGE_SHIFT; + end = (endoff - 1) >> PAGE_SHIFT; do { int want; unsigned nr_pages; From 553c942bef3fb0b70d4e2d20e883343c06455368 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 7 Jun 2017 12:10:31 +0200 Subject: [PATCH 456/465] Linux 4.11.4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7bab1279d0b5..741814dca844 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 11 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Fearless Coyote From 19f6a856132686eceba08a5cd80b48a4114024bd Mon Sep 17 00:00:00 2001 From: vivek Date: Tue, 3 Nov 2015 20:43:02 +0000 Subject: [PATCH 457/465] remove stray linux init header from vdso --- arch/x86/um/vdso/vdso.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S index 1cb468adacbb..4b4bd4cc06ab 100644 --- a/arch/x86/um/vdso/vdso.S +++ b/arch/x86/um/vdso/vdso.S @@ -1,4 +1,3 @@ -#include __INITDATA From 5431b5b1999c3d3b5efee817fb3373fbbd473063 Mon Sep 17 00:00:00 2001 From: vivek Date: Tue, 3 Nov 2015 20:43:02 +0000 Subject: [PATCH 458/465] i915 only disable default vga device --- drivers/gpu/drm/i915/intel_display.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed1f4f272b4f..2705b2a7dab6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16505,12 +16505,14 @@ static void i915_disable_vga(struct drm_i915_private *dev_priv) i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv); /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ - vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); - outb(SR01, VGA_SR_INDEX); - sr1 = inb(VGA_SR_DATA); - outb(sr1 | 1<<5, VGA_SR_DATA); - vga_put(pdev, VGA_RSRC_LEGACY_IO); - udelay(300); + if (pdev == vga_default_device()) { + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); + outb(SR01, VGA_SR_INDEX); + sr1 = inb(VGA_SR_DATA); + outb(sr1 | 1<<5, VGA_SR_DATA); + vga_put(pdev, VGA_RSRC_LEGACY_IO); + udelay(300); + } I915_WRITE(vga_reg, VGA_DISP_DISABLE); POSTING_READ(vga_reg); From 198990f13e1d9429864c177d9441a6559771c5e2 Mon Sep 17 00:00:00 2001 From: John Vert Date: Fri, 20 Jan 2017 14:47:55 -0800 Subject: [PATCH 459/465] broadwell enable fast sample c ops --- drivers/gpu/drm/i915/intel_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6a29784d2b41..9e25393852cd 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7267,6 +7267,10 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); + /* Faster SAMPLE_C operations. */ + I915_WRITE(HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); + lpt_init_clock_gating(dev_priv); } From 549c3dc10fa3749b3999549a2672b14bf0e9786d Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Wed, 16 Nov 2016 17:17:09 -0800 Subject: [PATCH 460/465] Add support for the Xbox One S controller over Bluetooth --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 2 ++ drivers/hid/hid-microsoft.c | 38 +++++++++++++++++++++++++++++++++++++ net/bluetooth/l2cap_core.c | 9 +++++++++ 4 files changed, 50 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index d162f0dc76e3..0e5a531ce90f 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1984,6 +1984,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_ONE_S) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b26c030926c1..c34a181a4070 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -733,6 +733,8 @@ #define USB_DEVICE_ID_MS_TOUCH_COVER_2 0x07a7 #define USB_DEVICE_ID_MS_TYPE_COVER_2 0x07a9 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da +#define USB_DEVICE_ID_MS_XBOX_ONE_S 0x02e0 + #define USB_VENDOR_ID_MOJO 0x8282 #define USB_DEVICE_ID_RETRO_ADAPTER 0x3201 diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c index 96e7d3231d2f..6495742cd245 100644 --- a/drivers/hid/hid-microsoft.c +++ b/drivers/hid/hid-microsoft.c @@ -28,6 +28,8 @@ #define MS_RDESC 0x08 #define MS_NOGET 0x10 #define MS_DUPLICATE_USAGES 0x20 +#define MS_RDESC_3K 0x40 +#define MS_XBOX_ONE_S 0x80 static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) @@ -130,11 +132,43 @@ static int ms_presenter_8k_quirk(struct hid_input *hi, struct hid_usage *usage, return 1; } +static int ms_xbox_one_s_quirk(struct hid_input *hi, struct hid_usage *usage, + unsigned long **bit, int *max) +{ + switch (usage->hid & HID_USAGE_PAGE) { + + case HID_UP_GENDESK: + if ((usage->hid & 0xf0) == 0x80) { /* SystemControl */ + switch (usage->hid & 0xf) { + case 0x5: + /* override the default mapping of KEY_MENU */ + ms_map_key_clear(BTN_MODE); + return 1; + default: + break; + } + break; + } + break; + + default: + break; + } + + /* Let the normal mapping happen for everything else */ + return 0; +} + static int ms_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); + + if (quirks & MS_XBOX_ONE_S) { + int ret = ms_xbox_one_s_quirk(hi, usage, bit, max); + return ret; + } if (quirks & MS_ERGONOMY) { int ret = ms_ergonomy_kb_quirk(hi, usage, bit, max); @@ -281,6 +315,10 @@ static const struct hid_device_id ms_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT), .driver_data = MS_PRESENTER }, + + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_ONE_S), + .driver_data = MS_XBOX_ONE_S }, + { } }; MODULE_DEVICE_TABLE(hid, ms_devices); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fc7f321a3823..2d671ed28c6c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3214,6 +3214,14 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) switch (chan->mode) { case L2CAP_MODE_BASIC: + /* HACK FOR XBOX ONE S CONTROLLERS + * If we add options to the configuration request, the + * Xbox One S controller responds with: + * Result: Failure - unknown options (0x0003) + * For now, just don't send configuration options. + * Remove this hack once the controller supports it + */ +#if 0 if (disable_ertm) break; @@ -3230,6 +3238,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc); +#endif break; case L2CAP_MODE_ERTM: From 6e215b67a13c85d0cf5a9e0970acd9df7d2b77bd Mon Sep 17 00:00:00 2001 From: vivek Date: Tue, 3 Nov 2015 20:43:03 +0000 Subject: [PATCH 461/465] sixaxis dualshock report via ctrl quirk --- net/bluetooth/hidp/core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 0bec4588c3c8..f2d3dcda3764 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -378,9 +378,16 @@ static int hidp_output_report(struct hid_device *hid, __u8 *data, size_t count) { struct hidp_session *session = hid->driver_data; - return hidp_send_intr_message(session, - HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT, - data, count); + /* The Sixaxis and Dualshock 4 wants report sent via the ctrl channel */ + if(hid->vendor == 0x54c && (hid->product == 0x5c4 || hid->product == 0x268)) { + return hidp_send_ctrl_message(session, + HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT, + data, count); + } else { + return hidp_send_intr_message(session, + HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT, + data, count); + } } static int hidp_raw_request(struct hid_device *hid, unsigned char reportnum, From 5dfe659f9ddcc0256a2331d63e016caa91b48111 Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Mon, 28 Dec 2015 13:52:02 -0800 Subject: [PATCH 462/465] xpad: one more third-party controller, courtesy of Ignacio Quezada --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 153b1ee13e03..7ddf10704327 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -180,6 +180,7 @@ static const struct xpad_device { { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, From 1a8267c7a49820529da1b038adab4f40a5215c01 Mon Sep 17 00:00:00 2001 From: John Vert Date: Tue, 31 Jan 2017 17:38:49 -0800 Subject: [PATCH 463/465] add REPORTING-BUGS file to fix build --- REPORTING-BUGS | 174 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 REPORTING-BUGS diff --git a/REPORTING-BUGS b/REPORTING-BUGS new file mode 100644 index 000000000000..0cb8cdfa63bc --- /dev/null +++ b/REPORTING-BUGS @@ -0,0 +1,174 @@ +Background +========== + +The upstream Linux kernel maintainers only fix bugs for specific kernel +versions. Those versions include the current "release candidate" (or -rc) +kernel, any "stable" kernel versions, and any "long term" kernels. + +Please see https://www.kernel.org/ for a list of supported kernels. Any +kernel marked with [EOL] is "end of life" and will not have any fixes +backported to it. + +If you've found a bug on a kernel version isn't listed on kernel.org, +contact your Linux distribution or embedded vendor for support. +Alternatively, you can attempt to run one of the supported stable or -rc +kernels, and see if you can reproduce the bug on that. It's preferable +to reproduce the bug on the latest -rc kernel. + + +How to report Linux kernel bugs +=============================== + + +Identify the problematic subsystem +---------------------------------- + +Identifying which part of the Linux kernel might be causing your issue +increases your chances of getting your bug fixed. Simply posting to the +generic linux-kernel mailing list (LKML) may cause your bug report to be +lost in the noise of a mailing list that gets 1000+ emails a day. + +Instead, try to figure out which kernel subsystem is causing the issue, +and email that subsystem's maintainer and mailing list. If the subsystem +maintainer doesn't answer, then expand your scope to mailing lists like +LKML. + + +Identify who to notify +---------------------- + +Once you know the subsystem that is causing the issue, you should send a +bug report. Some maintainers prefer bugs to be reported via bugzilla +(https://bugzilla.kernel.org), while others prefer that bugs be reported +via the subsystem mailing list. + +To find out where to send an emailed bug report, find your subsystem or +device driver in the MAINTAINERS file. Search in the file for relevant +entries, and send your bug report to the person(s) listed in the "M:" +lines, making sure to Cc the mailing list(s) in the "L:" lines. When the +maintainer replies to you, make sure to 'Reply-all' in order to keep the +public mailing list(s) in the email thread. + +If you know which driver is causing issues, you can pass one of the driver +files to the get_maintainer.pl script: + perl scripts/get_maintainer.pl -f + +If it is a security bug, please copy the Security Contact listed in the +MAINTAINERS file. They can help coordinate bugfix and disclosure. See +Documentation/SecurityBugs for more information. + +If you can't figure out which subsystem caused the issue, you should file +a bug in kernel.org bugzilla and send email to +linux-kernel@vger.kernel.org, referencing the bugzilla URL. (For more +information on the linux-kernel mailing list see +http://www.tux.org/lkml/). + + +Tips for reporting bugs +----------------------- + +If you haven't reported a bug before, please read: + +http://www.chiark.greenend.org.uk/~sgtatham/bugs.html +http://www.catb.org/esr/faqs/smart-questions.html + +It's REALLY important to report bugs that seem unrelated as separate email +threads or separate bugzilla entries. If you report several unrelated +bugs at once, it's difficult for maintainers to tease apart the relevant +data. + + +Gather information +------------------ + +The most important information in a bug report is how to reproduce the +bug. This includes system information, and (most importantly) +step-by-step instructions for how a user can trigger the bug. + +If the failure includes an "OOPS:", take a picture of the screen, capture +a netconsole trace, or type the message from your screen into the bug +report. Please read "Documentation/oops-tracing.txt" before posting your +bug report. This explains what you should do with the "Oops" information +to make it useful to the recipient. + +This is a suggested format for a bug report sent via email or bugzilla. +Having a standardized bug report form makes it easier for you not to +overlook things, and easier for the developers to find the pieces of +information they're really interested in. If some information is not +relevant to your bug, feel free to exclude it. + +First run the ver_linux script included as scripts/ver_linux, which +reports the version of some important subsystems. Run this script with +the command "sh scripts/ver_linux". + +Use that information to fill in all fields of the bug report form, and +post it to the mailing list with a subject of "PROBLEM: " for easy identification by the developers. + +[1.] One line summary of the problem: +[2.] Full description of the problem/report: +[3.] Keywords (i.e., modules, networking, kernel): +[4.] Kernel information +[4.1.] Kernel version (from /proc/version): +[4.2.] Kernel .config file: +[5.] Most recent kernel version which did not have the bug: +[6.] Output of Oops.. message (if applicable) with symbolic information + resolved (see Documentation/oops-tracing.txt) +[7.] A small shell script or example program which triggers the + problem (if possible) +[8.] Environment +[8.1.] Software (add the output of the ver_linux script here) +[8.2.] Processor information (from /proc/cpuinfo): +[8.3.] Module information (from /proc/modules): +[8.4.] Loaded driver and hardware information (/proc/ioports, /proc/iomem) +[8.5.] PCI information ('lspci -vvv' as root) +[8.6.] SCSI information (from /proc/scsi/scsi) +[8.7.] Other information that might be relevant to the problem + (please look in /proc and include all information that you + think to be relevant): +[X.] Other notes, patches, fixes, workarounds: + + +Follow up +========= + +Expectations for bug reporters +------------------------------ + +Linux kernel maintainers expect bug reporters to be able to follow up on +bug reports. That may include running new tests, applying patches, +recompiling your kernel, and/or re-triggering your bug. The most +frustrating thing for maintainers is for someone to report a bug, and then +never follow up on a request to try out a fix. + +That said, it's still useful for a kernel maintainer to know a bug exists +on a supported kernel, even if you can't follow up with retests. Follow +up reports, such as replying to the email thread with "I tried the latest +kernel and I can't reproduce my bug anymore" are also helpful, because +maintainers have to assume silence means things are still broken. + +Expectations for kernel maintainers +----------------------------------- + +Linux kernel maintainers are busy, overworked human beings. Some times +they may not be able to address your bug in a day, a week, or two weeks. +If they don't answer your email, they may be on vacation, or at a Linux +conference. Check the conference schedule at LWN.net for more info: + https://lwn.net/Calendar/ + +In general, kernel maintainers take 1 to 5 business days to respond to +bugs. The majority of kernel maintainers are employed to work on the +kernel, and they may not work on the weekends. Maintainers are scattered +around the world, and they may not work in your time zone. Unless you +have a high priority bug, please wait at least a week after the first bug +report before sending the maintainer a reminder email. + +The exceptions to this rule are regressions, kernel crashes, security holes, +or userspace breakage caused by new kernel behavior. Those bugs should be +addressed by the maintainers ASAP. If you suspect a maintainer is not +responding to these types of bugs in a timely manner (especially during a +merge window), escalate the bug to LKML and Linus Torvalds. + +Thank you! + +[Some of this is taken from Frohwalt Egerer's original linux-kernel FAQ] From 5e1625ee366447ae07d6c8a6e1459a73dbc59e2b Mon Sep 17 00:00:00 2001 From: John Vert Date: Fri, 5 May 2017 12:04:21 -0700 Subject: [PATCH 464/465] add -steamos EXTRAVERSION --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 741814dca844..e2d6cdf9faf2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 11 SUBLEVEL = 4 -EXTRAVERSION = +EXTRAVERSION = -steamos NAME = Fearless Coyote # *DOCUMENTATION* From 2679c9fd60f6a51a8308ae58dd12e9c86829f04b Mon Sep 17 00:00:00 2001 From: John Vert Date: Wed, 7 Jun 2017 14:51:24 -0700 Subject: [PATCH 465/465] fix bad logic in Sony LED suspend/resume path Thanks to @flibitijibibo for pointing out the problem. --- drivers/hid/hid-sony.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 740996f9bdd4..155d54808c8c 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2740,8 +2740,8 @@ static int sony_suspend(struct hid_device *hdev, pm_message_t message) * On suspend save the current LED state, * stop running force-feedback and blank the LEDS. */ - if (SONY_LED_SUPPORT || SONY_FF_SUPPORT) { - struct sony_sc *sc = hid_get_drvdata(hdev); + struct sony_sc *sc = hid_get_drvdata(hdev); + if (sc->quirks & (SONY_LED_SUPPORT | SONY_FF_SUPPORT)) { #ifdef CONFIG_SONY_FF sc->left = sc->right = 0; @@ -2760,8 +2760,8 @@ static int sony_suspend(struct hid_device *hdev, pm_message_t message) static int sony_resume(struct hid_device *hdev) { /* Restore the state of controller LEDs on resume */ - if (SONY_LED_SUPPORT) { - struct sony_sc *sc = hid_get_drvdata(hdev); + struct sony_sc *sc = hid_get_drvdata(hdev); + if (sc->quirks & SONY_LED_SUPPORT) { memcpy(sc->led_state, sc->resume_led_state, sizeof(sc->led_state));