Commit c5538db2 authored by Lorenzo "Palinuro" Faletra's avatar Lorenzo "Palinuro" Faletra
Browse files

Import Upstream version 5.7.10

parent 675a03b4
Pipeline #742 failed with stages
......@@ -2790,8 +2790,10 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
return;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
if (!cw)
if (!cw) {
css_put(&memcg->css);
return;
}
cw->memcg = memcg;
cw->cachep = cachep;
......@@ -6349,11 +6351,16 @@ static unsigned long effective_protection(unsigned long usage,
* We're using unprotected memory for the weight so that if
* some cgroups DO claim explicit protection, we don't protect
* the same bytes twice.
*
* Check both usage and parent_usage against the respective
* protected values. One should imply the other, but they
* aren't read atomically - make sure the division is sane.
*/
if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
return ep;
if (parent_effective > siblings_protected && usage > protected) {
if (parent_effective > siblings_protected &&
parent_usage > siblings_protected &&
usage > protected) {
unsigned long unclaimed;
unclaimed = parent_effective - siblings_protected;
......
......@@ -1501,7 +1501,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
}
#ifdef pte_index
static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
unsigned long addr, struct page *page, pgprot_t prot)
{
int err;
......@@ -1509,8 +1509,9 @@ static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
if (!page_count(page))
return -EINVAL;
err = validate_page_before_insert(page);
return err ? err : insert_page_into_pte_locked(
mm, pte_offset_map(pmd, addr), addr, page, prot);
if (err)
return err;
return insert_page_into_pte_locked(mm, pte, addr, page, prot);
}
/* insert_pages() amortizes the cost of spinlock operations
......@@ -1520,7 +1521,8 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num, pgprot_t prot)
{
pmd_t *pmd = NULL;
spinlock_t *pte_lock = NULL;
pte_t *start_pte, *pte;
spinlock_t *pte_lock;
struct mm_struct *const mm = vma->vm_mm;
unsigned long curr_page_idx = 0;
unsigned long remaining_pages_total = *num;
......@@ -1539,18 +1541,17 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
ret = -ENOMEM;
if (pte_alloc(mm, pmd))
goto out;
pte_lock = pte_lockptr(mm, pmd);
while (pages_to_write_in_pmd) {
int pte_idx = 0;
const int batch_size = min_t(int, pages_to_write_in_pmd, 8);
spin_lock(pte_lock);
for (; pte_idx < batch_size; ++pte_idx) {
int err = insert_page_in_batch_locked(mm, pmd,
start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
int err = insert_page_in_batch_locked(mm, pte,
addr, pages[curr_page_idx], prot);
if (unlikely(err)) {
spin_unlock(pte_lock);
pte_unmap_unlock(start_pte, pte_lock);
ret = err;
remaining_pages_total -= pte_idx;
goto out;
......@@ -1558,7 +1559,7 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
addr += PAGE_SIZE;
++curr_page_idx;
}
spin_unlock(pte_lock);
pte_unmap_unlock(start_pte, pte_lock);
pages_to_write_in_pmd -= batch_size;
remaining_pages_total -= batch_size;
}
......
......@@ -468,11 +468,20 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages)
{
const unsigned long end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
unsigned long pfn, cur_nr_pages, flags;
/* Poison struct pages because they are now uninitialized again. */
page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
cond_resched();
/* Select all remaining pages up to the next section boundary */
cur_nr_pages =
min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
page_init_poison(pfn_to_page(pfn),
sizeof(struct page) * cur_nr_pages);
}
#ifdef CONFIG_ZONE_DEVICE
/*
......
......@@ -348,7 +348,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
gfp_t gfp, int order,
struct kmem_cache *s)
{
unsigned int nr_pages = 1 << order;
int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
int ret;
......@@ -388,7 +388,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
unsigned int nr_pages = 1 << order;
int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
......
......@@ -1726,7 +1726,7 @@ void kzfree(const void *p)
if (unlikely(ZERO_OR_NULL_PTR(mem)))
return;
ks = ksize(mem);
memset(mem, 0, ks);
memzero_explicit(mem, ks);
kfree(mem);
}
EXPORT_SYMBOL(kzfree);
......
......@@ -679,6 +679,20 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
va_end(args);
}
static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
void *freelist, void *nextfree)
{
if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
!check_valid_pointer(s, page, nextfree)) {
object_err(s, page, freelist, "Freechain corrupt");
freelist = NULL;
slab_fix(s, "Isolate corrupted freechain");
return true;
}
return false;
}
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
unsigned int off; /* Offset of last byte */
......@@ -1410,6 +1424,11 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
void *freelist, void *nextfree)
{
return false;
}
#endif /* CONFIG_SLUB_DEBUG */
/*
......@@ -2093,6 +2112,14 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
void *prior;
unsigned long counters;
/*
* If 'nextfree' is invalid, it is possible that the object at
* 'freelist' is already corrupted. So isolate all objects
* starting at 'freelist'.
*/
if (freelist_corrupted(s, page, freelist, nextfree))
break;
do {
prior = page->freelist;
counters = page->counters;
......@@ -5654,7 +5681,8 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
*/
if (buffer)
buf = buffer;
else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
!IS_ENABLED(CONFIG_SLUB_STATS))
buf = mbuf;
else {
buffer = (char *) get_zeroed_page(GFP_KERNEL);
......
......@@ -23,6 +23,7 @@
#include <linux/huge_mm.h>
#include <asm/pgtable.h>
#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
......@@ -418,7 +419,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
/* May fail (-ENOMEM) if XArray node allocation failed. */
__SetPageLocked(new_page);
__SetPageSwapBacked(new_page);
err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
err = add_to_swap_cache(new_page, entry,
gfp_mask & GFP_RECLAIM_MASK);
if (likely(!err)) {
/* Initiate read into locked page */
SetPageWorkingset(new_page);
......
......@@ -1007,7 +1007,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs);
if (skb_transport_offset(skb) + ipv6_transport_len(skb) <
nsrcs_offset + sizeof(_nsrcs))
nsrcs_offset + sizeof(__nsrcs))
return -EINVAL;
_nsrcs = skb_header_pointer(skb, nsrcs_offset,
......
......@@ -217,8 +217,8 @@ struct net_bridge_port_group {
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
unsigned char eth_addr[ETH_ALEN] __aligned(2);
unsigned char flags;
unsigned char eth_addr[ETH_ALEN];
};
struct net_bridge_mdb_entry {
......
......@@ -445,6 +445,7 @@ static void target_copy(struct ceph_osd_request_target *dest,
dest->size = src->size;
dest->min_size = src->min_size;
dest->sort_bitwise = src->sort_bitwise;
dest->recovery_deletes = src->recovery_deletes;
dest->flags = src->flags;
dest->paused = src->paused;
......
......@@ -4109,10 +4109,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
local_bh_disable();
dev_xmit_recursion_inc();
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq))
ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
dev_xmit_recursion_dec();
local_bh_enable();
......@@ -9435,6 +9437,13 @@ int register_netdevice(struct net_device *dev)
rcu_barrier();
dev->reg_state = NETREG_UNREGISTERED;
/* We should put the kobject that hold in
* netdev_unregister_kobject(), otherwise
* the net device cannot be freed when
* driver calls free_netdev(), because the
* kobject is being hold.
*/
kobject_put(&dev->dev.kobj);
}
/*
* Prevent userspace races by waiting until the network
......
......@@ -4920,7 +4920,7 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
int err;
struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
if (!seg6_validate_srh(srh, len))
if (!seg6_validate_srh(srh, len, false))
return -EINVAL;
switch (type) {
......@@ -5724,12 +5724,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
unsigned int iphdr_len;
if (skb->protocol == cpu_to_be16(ETH_P_IP))
switch (skb_protocol(skb, true)) {
case cpu_to_be16(ETH_P_IP):
iphdr_len = sizeof(struct iphdr);
else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
break;
case cpu_to_be16(ETH_P_IPV6):
iphdr_len = sizeof(struct ipv6hdr);
else
break;
default:
return 0;
}
if (skb_headlen(skb) < iphdr_len)
return 0;
......
......@@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
return container_of(parser, struct sk_psock, parser);
}
static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
static void sk_psock_skb_redirect(struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
......@@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
}
}
static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
struct sk_buff *skb, int verdict)
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)
{
switch (verdict) {
case __SK_REDIRECT:
sk_psock_skb_redirect(psock, skb);
sk_psock_skb_redirect(skb);
break;
case __SK_PASS:
case __SK_DROP:
......@@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
}
sk_psock_tls_verdict_apply(skb, ret);
rcu_read_unlock();
sk_psock_tls_verdict_apply(psock, skb, ret);
return ret;
}
EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
......@@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
}
goto out_free;
case __SK_REDIRECT:
sk_psock_skb_redirect(psock, skb);
sk_psock_skb_redirect(skb);
break;
case __SK_DROP:
/* fall-through */
......@@ -782,11 +781,18 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
{
struct sk_psock *psock = sk_psock_from_strp(strp);
struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
struct sock *sk;
rcu_read_lock();
sk = strp->sk;
psock = sk_psock(sk);
if (unlikely(!psock)) {
kfree_skb(skb);
goto out;
}
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
skb_orphan(skb);
......@@ -794,8 +800,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
}
rcu_read_unlock();
sk_psock_verdict_apply(psock, skb, ret);
out:
rcu_read_unlock();
}
static int sk_psock_strp_read_done(struct strparser *strp, int err)
......
......@@ -707,7 +707,7 @@ bool sk_mc_loop(struct sock *sk)
return inet6_sk(sk)->mc_loop;
#endif
}
WARN_ON(1);
WARN_ON_ONCE(1);
return true;
}
EXPORT_SYMBOL(sk_mc_loop);
......@@ -1678,6 +1678,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
sk_tx_queue_clear(sk);
}
return sk;
......@@ -1836,7 +1837,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
/* sk->sk_memcg will be populated at accept() time */
newsk->sk_memcg = NULL;
cgroup_sk_alloc(&newsk->sk_cgrp_data);
cgroup_sk_clone(&newsk->sk_cgrp_data);
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
......@@ -1901,6 +1902,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
sk_tx_queue_clear(newsk);
RCU_INIT_POINTER(newsk->sk_wq, NULL);
if (newsk->sk_prot->sockets_allocated)
......
......@@ -70,11 +70,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
struct fd f;
int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
return -EINVAL;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
ret = sock_map_prog_update(map, prog, attr->attach_type);
ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
fdput(f);
return ret;
}
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
{
u32 ufd = attr->target_fd;
struct bpf_prog *prog;
struct bpf_map *map;
struct fd f;
int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
return -EINVAL;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
prog = bpf_prog_get(attr->attach_bpf_fd);
if (IS_ERR(prog)) {
ret = PTR_ERR(prog);
goto put_map;
}
if (prog->type != ptype) {
ret = -EINVAL;
goto put_prog;
}
ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
put_prog:
bpf_prog_put(prog);
put_map:
fdput(f);
return ret;
}
......@@ -1189,27 +1227,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
}
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
u32 which)
struct bpf_prog *old, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
psock_set_prog(&progs->msg_parser, prog);
pprog = &progs->msg_parser;
break;
case BPF_SK_SKB_STREAM_PARSER:
psock_set_prog(&progs->skb_parser, prog);
pprog = &progs->skb_parser;
break;
case BPF_SK_SKB_STREAM_VERDICT:
psock_set_prog(&progs->skb_verdict, prog);
pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
if (old)
return psock_replace_prog(pprog, prog, old);
psock_set_prog(pprog, prog);
return 0;
}
......
......@@ -277,7 +277,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
if (jit_enable < 2 ||
(jit_enable == 2 && bpf_dump_raw_ok())) {
(jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) {
*(int *)table->data = jit_enable;
if (jit_enable == 2)
pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
......
......@@ -40,9 +40,11 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
[NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
[NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
......
......@@ -2957,7 +2957,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
sizeof(match->mask.ipv6.dst));
}
if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) ||
memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) {
match->dissector.used_keys |=
BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
......
......@@ -376,10 +376,17 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
}
static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
const struct ethnl_dump_ctx *ctx)
const struct ethnl_dump_ctx *ctx,
struct netlink_callback *cb)
{
void *ehdr;
int ret;
ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&ethtool_genl_family, 0, ctx->ops->reply_cmd);
if (!ehdr)
return -EMSGSIZE;
ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
rtnl_lock();
ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
......@@ -395,6 +402,10 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
if (ctx->ops->cleanup_data)
ctx->ops->cleanup_data(ctx->reply_data);
ctx->reply_data->dev = NULL;
if (ret < 0)
genlmsg_cancel(skb, ehdr);
else
genlmsg_end(skb, ehdr);
return ret;
}
......@@ -411,7 +422,6 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
int s_idx = ctx->pos_idx;
int h, idx = 0;
int ret = 0;
void *ehdr;
rtnl_lock();
for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
......@@ -431,26 +441,15 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
dev_hold(dev);
rtnl_unlock();
ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
&ethtool_genl_family, 0,
ctx->ops->reply_cmd);
if (!ehdr) {
dev_put(dev);
ret = -EMSGSIZE;
goto out;
}
ret = ethnl_default_dump_one(skb, dev, ctx);
ret = ethnl_default_dump_one(skb, dev, ctx, cb);
dev_put(dev);
if (ret < 0) {
genlmsg_cancel(skb, ehdr);
if (ret == -EOPNOTSUPP)
goto lock_and_cont;
if (likely(skb->len))
ret = skb->len;
goto out;
}
genlmsg_end(skb, ehdr);
lock_and_cont:
rtnl_lock();
if (net->dev_base_seq != seq) {
......
......@@ -341,7 +341,7 @@ static void hsr_announce(struct timer_list *t)
rcu_read_unlock();
}
static void hsr_del_ports(struct hsr_priv *hsr)
void hsr_del_ports(struct hsr_priv *hsr)
{
struct hsr_port *port;
......@@ -358,31 +358,12 @@ static void hsr_del_ports(struct hsr_priv *hsr)
hsr_del_port(port);
}
/* This has to be called after all the readers are gone.
* Otherwise we would have to check the return value of
* hsr_port_get_hsr().
*/