cls_flower.c 82.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
3
4
5
6
7
8
9
10
11
/*
 * net/sched/cls_flower.c		Flower classifier
 *
 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rhashtable.h>
12
#include <linux/workqueue.h>
13
#include <linux/refcount.h>
14
15
16
17

#include <linux/if_ether.h>
#include <linux/in6.h>
#include <linux/ip.h>
18
#include <linux/mpls.h>
19
20
21
22
23

#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
24
#include <net/geneve.h>
25
26
#include <net/vxlan.h>
#include <net/erspan.h>
27

28
29
30
#include <net/dst.h>
#include <net/dst_metadata.h>

31
32
#include <uapi/linux/netfilter/nf_conntrack_common.h>

33
struct fl_flow_key {
34
	struct flow_dissector_key_meta meta;
35
	struct flow_dissector_key_control control;
36
	struct flow_dissector_key_control enc_control;
37
38
	struct flow_dissector_key_basic basic;
	struct flow_dissector_key_eth_addrs eth;
39
	struct flow_dissector_key_vlan vlan;
40
	struct flow_dissector_key_vlan cvlan;
41
42
43
44
45
	union {
		struct flow_dissector_key_ipv4_addrs ipv4;
		struct flow_dissector_key_ipv6_addrs ipv6;
	};
	struct flow_dissector_key_ports tp;
46
47
	struct flow_dissector_key_icmp icmp;
	struct flow_dissector_key_arp arp;
48
49
50
51
52
	struct flow_dissector_key_keyid enc_key_id;
	union {
		struct flow_dissector_key_ipv4_addrs enc_ipv4;
		struct flow_dissector_key_ipv6_addrs enc_ipv6;
	};
53
	struct flow_dissector_key_ports enc_tp;
54
	struct flow_dissector_key_mpls mpls;
55
56
	struct flow_dissector_key_tcp tcp;
	struct flow_dissector_key_ip ip;
57
58
	struct flow_dissector_key_ip enc_ip;
	struct flow_dissector_key_enc_opts enc_opts;
59
60
61
62
63
64
65
	union {
		struct flow_dissector_key_ports tp;
		struct {
			struct flow_dissector_key_ports tp_min;
			struct flow_dissector_key_ports tp_max;
		};
	} tp_range;
66
	struct flow_dissector_key_ct ct;
67
68
69
70
71
72
73
74
75
76
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */

struct fl_flow_mask_range {
	unsigned short int start;
	unsigned short int end;
};

struct fl_flow_mask {
	struct fl_flow_key key;
	struct fl_flow_mask_range range;
77
	u32 flags;
78
79
80
81
82
83
84
	struct rhash_head ht_node;
	struct rhashtable ht;
	struct rhashtable_params filter_ht_params;
	struct flow_dissector dissector;
	struct list_head filters;
	struct rcu_work rwork;
	struct list_head list;
85
	refcount_t refcnt;
86
87
};

88
89
90
91
92
93
94
struct fl_flow_tmplt {
	struct fl_flow_key dummy_key;
	struct fl_flow_key mask;
	struct flow_dissector dissector;
	struct tcf_chain *chain;
};

95
96
struct cls_fl_head {
	struct rhashtable ht;
97
	spinlock_t masks_lock; /* Protect masks list */
98
	struct list_head masks;
99
	struct list_head hw_filters;
100
	struct rcu_work rwork;
101
	struct idr handle_idr;
102
103
104
};

struct cls_fl_filter {
105
	struct fl_flow_mask *mask;
106
107
108
109
110
111
	struct rhash_head ht_node;
	struct fl_flow_key mkey;
	struct tcf_exts exts;
	struct tcf_result res;
	struct fl_flow_key key;
	struct list_head list;
112
	struct list_head hw_list;
113
	u32 handle;
114
	u32 flags;
115
	u32 in_hw_count;
116
	struct rcu_work rwork;
117
	struct net_device *hw_dev;
118
119
120
121
122
123
	/* Flower classifier is unlocked, which means that its reference counter
	 * can be changed concurrently without any kind of external
	 * synchronization. Use atomic reference counter to be concurrency-safe.
	 */
	refcount_t refcnt;
	bool deleted;
124
125
};

126
127
128
129
130
131
132
static const struct rhashtable_params mask_ht_params = {
	.key_offset = offsetof(struct fl_flow_mask, key),
	.key_len = sizeof(struct fl_flow_key),
	.head_offset = offsetof(struct fl_flow_mask, ht_node),
	.automatic_shrinking = true,
};

133
134
135
136
137
138
139
140
141
static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
{
	return mask->range.end - mask->range.start;
}

static void fl_mask_update_range(struct fl_flow_mask *mask)
{
	const u8 *bytes = (const u8 *) &mask->key;
	size_t size = sizeof(mask->key);
142
	size_t i, first = 0, last;
143

144
145
146
147
148
149
150
151
	for (i = 0; i < size; i++) {
		if (bytes[i]) {
			first = i;
			break;
		}
	}
	last = first;
	for (i = size - 1; i != first; i--) {
152
153
		if (bytes[i]) {
			last = i;
154
			break;
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
		}
	}
	mask->range.start = rounddown(first, sizeof(long));
	mask->range.end = roundup(last + 1, sizeof(long));
}

static void *fl_key_get_start(struct fl_flow_key *key,
			      const struct fl_flow_mask *mask)
{
	return (u8 *) key + mask->range.start;
}

static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
			      struct fl_flow_mask *mask)
{
	const long *lkey = fl_key_get_start(key, mask);
	const long *lmask = fl_key_get_start(&mask->key, mask);
	long *lmkey = fl_key_get_start(mkey, mask);
	int i;

	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
		*lmkey++ = *lkey++ & *lmask++;
}

179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
static bool fl_mask_fits_tmplt(struct fl_flow_tmplt *tmplt,
			       struct fl_flow_mask *mask)
{
	const long *lmask = fl_key_get_start(&mask->key, mask);
	const long *ltmplt;
	int i;

	if (!tmplt)
		return true;
	ltmplt = fl_key_get_start(&tmplt->mask, mask);
	for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) {
		if (~*ltmplt++ & *lmask++)
			return false;
	}
	return true;
}

196
197
198
199
200
201
static void fl_clear_masked_range(struct fl_flow_key *key,
				  struct fl_flow_mask *mask)
{
	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
}

202
203
204
205
206
207
static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
				  struct fl_flow_key *key,
				  struct fl_flow_key *mkey)
{
	__be16 min_mask, max_mask, min_val, max_val;

208
209
210
211
	min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
	max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
	min_val = htons(filter->key.tp_range.tp_min.dst);
	max_val = htons(filter->key.tp_range.tp_max.dst);
212
213

	if (min_mask && max_mask) {
214
215
		if (htons(key->tp_range.tp.dst) < min_val ||
		    htons(key->tp_range.tp.dst) > max_val)
216
217
218
			return false;

		/* skb does not have min and max values */
219
220
		mkey->tp_range.tp_min.dst = filter->mkey.tp_range.tp_min.dst;
		mkey->tp_range.tp_max.dst = filter->mkey.tp_range.tp_max.dst;
221
222
223
224
225
226
227
228
229
230
	}
	return true;
}

static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
				  struct fl_flow_key *key,
				  struct fl_flow_key *mkey)
{
	__be16 min_mask, max_mask, min_val, max_val;

231
232
233
234
	min_mask = htons(filter->mask->key.tp_range.tp_min.src);
	max_mask = htons(filter->mask->key.tp_range.tp_max.src);
	min_val = htons(filter->key.tp_range.tp_min.src);
	max_val = htons(filter->key.tp_range.tp_max.src);
235
236

	if (min_mask && max_mask) {
237
238
		if (htons(key->tp_range.tp.src) < min_val ||
		    htons(key->tp_range.tp.src) > max_val)
239
240
241
			return false;

		/* skb does not have min and max values */
242
243
		mkey->tp_range.tp_min.src = filter->mkey.tp_range.tp_min.src;
		mkey->tp_range.tp_max.src = filter->mkey.tp_range.tp_max.src;
244
245
246
247
248
249
	}
	return true;
}

static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask,
					 struct fl_flow_key *mkey)
250
{
251
252
	return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
				      mask->filter_ht_params);
253
254
}

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
					     struct fl_flow_key *mkey,
					     struct fl_flow_key *key)
{
	struct cls_fl_filter *filter, *f;

	list_for_each_entry_rcu(filter, &mask->filters, list) {
		if (!fl_range_port_dst_cmp(filter, key, mkey))
			continue;

		if (!fl_range_port_src_cmp(filter, key, mkey))
			continue;

		f = __fl_lookup(mask, mkey);
		if (f)
			return f;
	}
	return NULL;
}

static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
				       struct fl_flow_key *mkey,
				       struct fl_flow_key *key)
{
	if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
		return fl_lookup_range(mask, mkey, key);

	return __fl_lookup(mask, mkey);
}

285
286
287
288
289
290
291
292
293
294
295
296
297
static u16 fl_ct_info_to_flower_map[] = {
	[IP_CT_ESTABLISHED] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED,
	[IP_CT_RELATED] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_RELATED,
	[IP_CT_ESTABLISHED_REPLY] =	TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED,
	[IP_CT_RELATED_REPLY] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_RELATED,
	[IP_CT_NEW] =			TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_NEW,
};

298
299
300
301
302
static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		       struct tcf_result *res)
{
	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
	struct fl_flow_key skb_mkey;
303
304
305
	struct fl_flow_key skb_key;
	struct fl_flow_mask *mask;
	struct cls_fl_filter *f;
306

307
	list_for_each_entry_rcu(mask, &head->masks, list) {
308
		flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
309
		fl_clear_masked_range(&skb_key, mask);
310

311
		skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
312
313
314
		/* skb_flow_dissect() does not set n_proto in case an unknown
		 * protocol, so do it rather here.
		 */
315
		skb_key.basic.n_proto = skb_protocol(skb, false);
316
		skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
317
318
319
		skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
				    fl_ct_info_to_flower_map,
				    ARRAY_SIZE(fl_ct_info_to_flower_map));
320
		skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
321

322
		fl_set_masked_key(&skb_mkey, &skb_key, mask);
323

324
		f = fl_lookup(mask, &skb_mkey, &skb_key);
325
326
327
328
		if (f && !tc_skip_sw(f->flags)) {
			*res = f->res;
			return tcf_exts_exec(skb, &f->exts, res);
		}
329
330
331
332
333
334
335
336
337
338
339
340
	}
	return -1;
}

static int fl_init(struct tcf_proto *tp)
{
	struct cls_fl_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (!head)
		return -ENOBUFS;

341
	spin_lock_init(&head->masks_lock);
342
	INIT_LIST_HEAD_RCU(&head->masks);
343
	INIT_LIST_HEAD(&head->hw_filters);
344
	rcu_assign_pointer(tp->root, head);
345
	idr_init(&head->handle_idr);
346

347
348
349
	return rhashtable_init(&head->ht, &mask_ht_params);
}

350
static void fl_mask_free(struct fl_flow_mask *mask, bool mask_init_done)
351
{
352
353
354
355
356
	/* temporary masks don't have their filters list and ht initialized */
	if (mask_init_done) {
		WARN_ON(!list_empty(&mask->filters));
		rhashtable_destroy(&mask->ht);
	}
357
358
359
360
361
362
363
364
	kfree(mask);
}

static void fl_mask_free_work(struct work_struct *work)
{
	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
						 struct fl_flow_mask, rwork);

365
366
367
368
369
370
371
372
373
	fl_mask_free(mask, true);
}

static void fl_uninit_mask_free_work(struct work_struct *work)
{
	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
						 struct fl_flow_mask, rwork);

	fl_mask_free(mask, false);
374
375
}

376
static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask)
377
{
378
	if (!refcount_dec_and_test(&mask->refcnt))
379
380
381
		return false;

	rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
382
383

	spin_lock(&head->masks_lock);
384
	list_del_rcu(&mask->list);
385
386
387
	spin_unlock(&head->masks_lock);

	tcf_queue_work(&mask->rwork, fl_mask_free_work);
388
389

	return true;
390
391
}

392
393
394
395
396
397
398
399
400
401
static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
{
	/* Flower classifier only changes root pointer during init and destroy.
	 * Users must obtain reference to tcf_proto instance before calling its
	 * API, so tp->root pointer is protected from concurrent call to
	 * fl_destroy() by reference counting.
	 */
	return rcu_dereference_raw(tp->root);
}

402
403
404
405
406
407
408
409
410
static void __fl_destroy_filter(struct cls_fl_filter *f)
{
	tcf_exts_destroy(&f->exts);
	tcf_exts_put_net(&f->exts);
	kfree(f);
}

static void fl_destroy_filter_work(struct work_struct *work)
{
411
412
	struct cls_fl_filter *f = container_of(to_rcu_work(work),
					struct cls_fl_filter, rwork);
413
414
415
416

	__fl_destroy_filter(f);
}

417
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
418
				 bool rtnl_held, struct netlink_ext_ack *extack)
419
{
420
	struct tcf_block *block = tp->chain->block;
421
	struct flow_cls_offload cls_flower = {};
422

423
	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
424
	cls_flower.command = FLOW_CLS_DESTROY;
425
	cls_flower.cookie = (unsigned long) f;
426

427
428
	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false,
			    &f->flags, &f->in_hw_count, rtnl_held);
429

430
431
}

432
static int fl_hw_replace_filter(struct tcf_proto *tp,
433
				struct cls_fl_filter *f, bool rtnl_held,
434
				struct netlink_ext_ack *extack)
435
{
436
	struct tcf_block *block = tp->chain->block;
437
	struct flow_cls_offload cls_flower = {};
438
	bool skip_sw = tc_skip_sw(f->flags);
439
440
	int err = 0;

441
	cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
442
443
	if (!cls_flower.rule)
		return -ENOMEM;
444

445
	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
446
	cls_flower.command = FLOW_CLS_REPLACE;
447
	cls_flower.cookie = (unsigned long) f;
448
449
450
	cls_flower.rule->match.dissector = &f->mask->dissector;
	cls_flower.rule->match.mask = &f->mask->key;
	cls_flower.rule->match.key = &f->mkey;
451
	cls_flower.classid = f->res.classid;
452

453
	err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
454
455
	if (err) {
		kfree(cls_flower.rule);
456
		if (skip_sw) {
457
			NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
458
459
460
			return err;
		}
		return 0;
461
462
	}

463
464
465
	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
			      skip_sw, &f->flags, &f->in_hw_count, rtnl_held);
	tc_cleanup_flow_action(&cls_flower.rule->action);
466
467
	kfree(cls_flower.rule);

468
469
470
	if (err) {
		fl_hw_destroy_filter(tp, f, rtnl_held, NULL);
		return err;
471
	}
472

473
474
	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
		return -EINVAL;
475

476
	return 0;
477
478
}

479
480
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
			       bool rtnl_held)
481
{
482
	struct tcf_block *block = tp->chain->block;
483
	struct flow_cls_offload cls_flower = {};
484

485
	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
486
	cls_flower.command = FLOW_CLS_STATS;
487
	cls_flower.cookie = (unsigned long) f;
488
	cls_flower.classid = f->res.classid;
489

490
491
	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
			 rtnl_held);
492
493
494

	tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
			      cls_flower.stats.pkts,
495
496
497
			      cls_flower.stats.lastused,
			      cls_flower.stats.used_hw_stats,
			      cls_flower.stats.used_hw_stats_valid);
498
}
499

500
static void __fl_put(struct cls_fl_filter *f)
501
{
502
503
	if (!refcount_dec_and_test(&f->refcnt))
		return;
504

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
	if (tcf_exts_get_net(&f->exts))
		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
	else
		__fl_destroy_filter(f);
}

static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
{
	struct cls_fl_filter *f;

	rcu_read_lock();
	f = idr_find(&head->handle_idr, handle);
	if (f && !refcount_inc_not_zero(&f->refcnt))
		f = NULL;
	rcu_read_unlock();

	return f;
}

static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
		       bool *last, bool rtnl_held,
		       struct netlink_ext_ack *extack)
{
	struct cls_fl_head *head = fl_head_dereference(tp);

	*last = false;

	spin_lock(&tp->lock);
	if (f->deleted) {
		spin_unlock(&tp->lock);
		return -ENOENT;
	}

	f->deleted = true;
	rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
			       f->mask->filter_ht_params);
541
	idr_remove(&head->handle_idr, f->handle);
542
	list_del_rcu(&f->list);
543
544
545
	spin_unlock(&tp->lock);

	*last = fl_mask_put(head, f->mask);
546
	if (!tc_skip_hw(f->flags))
547
		fl_hw_destroy_filter(tp, f, rtnl_held, extack);
548
	tcf_unbind_filter(tp, &f->res);
549
	__fl_put(f);
550

551
	return 0;
552
553
}

554
555
static void fl_destroy_sleepable(struct work_struct *work)
{
556
557
558
559
560
	struct cls_fl_head *head = container_of(to_rcu_work(work),
						struct cls_fl_head,
						rwork);

	rhashtable_destroy(&head->ht);
561
562
563
564
	kfree(head);
	module_put(THIS_MODULE);
}

565
566
static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
		       struct netlink_ext_ack *extack)
567
{
568
	struct cls_fl_head *head = fl_head_dereference(tp);
569
	struct fl_flow_mask *mask, *next_mask;
570
	struct cls_fl_filter *f, *next;
571
	bool last;
572

573
574
	list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
		list_for_each_entry_safe(f, next, &mask->filters, list) {
575
576
			__fl_delete(tp, f, &last, rtnl_held, extack);
			if (last)
577
578
579
				break;
		}
	}
580
	idr_destroy(&head->handle_idr);
581
582

	__module_get(THIS_MODULE);
583
	tcf_queue_work(&head->rwork, fl_destroy_sleepable);
584
585
}

586
587
588
589
590
591
592
static void fl_put(struct tcf_proto *tp, void *arg)
{
	struct cls_fl_filter *f = arg;

	__fl_put(f);
}

593
static void *fl_get(struct tcf_proto *tp, u32 handle)
594
{
595
	struct cls_fl_head *head = fl_head_dereference(tp);
596

597
	return __fl_get(head, handle);
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
}

static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
					    .len = IFNAMSIZ },
	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_KEY_ID]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_SRC]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_DST]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_TCP_SRC_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_TCP_DST_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_SRC_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_DST_MASK]	= { .type = NLA_U16 },
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
	[TCA_FLOWER_KEY_SCTP_SRC_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_SCTP_DST_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_SCTP_SRC]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_SCTP_DST]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_FLAGS]		= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_FLAGS_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ICMPV4_TYPE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV4_CODE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_TYPE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_CODE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ARP_SIP]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_SIP_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_TIP]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_TIP_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_OP]		= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ARP_OP_MASK]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ARP_SHA]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ARP_SHA_MASK]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ARP_THA]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ARP_THA_MASK]	= { .len = ETH_ALEN },
667
668
669
670
	[TCA_FLOWER_KEY_MPLS_TTL]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_MPLS_BOS]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_MPLS_TC]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_MPLS_LABEL]	= { .type = NLA_U32 },
671
672
673
674
675
676
	[TCA_FLOWER_KEY_TCP_FLAGS]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_TCP_FLAGS_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_IP_TOS]		= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
677
678
679
680
681
682
683
684
685
	[TCA_FLOWER_KEY_CVLAN_ID]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CVLAN_PRIO]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_IP_TOS]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_IP_TTL]	 = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_OPTS]	= { .type = NLA_NESTED },
	[TCA_FLOWER_KEY_ENC_OPTS_MASK]	= { .type = NLA_NESTED },
686
687
688
689
690
691
692
693
694
695
	[TCA_FLOWER_KEY_CT_STATE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_STATE_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_ZONE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_ZONE_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_MARK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_CT_MARK_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_CT_LABELS]	= { .type = NLA_BINARY,
					    .len = 128 / BITS_PER_BYTE },
	[TCA_FLOWER_KEY_CT_LABELS_MASK]	= { .type = NLA_BINARY,
					    .len = 128 / BITS_PER_BYTE },
696
	[TCA_FLOWER_FLAGS]		= { .type = NLA_U32 },
697
698
699
700
};

static const struct nla_policy
enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
701
702
	[TCA_FLOWER_KEY_ENC_OPTS_UNSPEC]        = {
		.strict_start_type = TCA_FLOWER_KEY_ENC_OPTS_VXLAN },
703
	[TCA_FLOWER_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
704
705
	[TCA_FLOWER_KEY_ENC_OPTS_VXLAN]         = { .type = NLA_NESTED },
	[TCA_FLOWER_KEY_ENC_OPTS_ERSPAN]        = { .type = NLA_NESTED },
706
707
708
709
710
711
712
713
};

static const struct nla_policy
geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY,
						       .len = 128 },
714
715
};

716
717
718
719
720
721
722
723
724
725
726
727
728
static const struct nla_policy
vxlan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1] = {
	[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]         = { .type = NLA_U32 },
};

static const struct nla_policy
erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
	[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]        = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]      = { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]        = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]       = { .type = NLA_U8 },
};

729
730
731
732
733
734
static void fl_set_key_val(struct nlattr **tb,
			   void *val, int val_type,
			   void *mask, int mask_type, int len)
{
	if (!tb[val_type])
		return;
735
	nla_memcpy(val, tb[val_type], len);
736
737
738
	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
		memset(mask, 0xff, len);
	else
739
		nla_memcpy(mask, tb[mask_type], len);
740
741
}

742
static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
743
744
				 struct fl_flow_key *mask,
				 struct netlink_ext_ack *extack)
745
{
746
747
748
749
750
751
752
753
754
755
756
757
758
	fl_set_key_val(tb, &key->tp_range.tp_min.dst,
		       TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_range.tp_min.dst,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.dst));
	fl_set_key_val(tb, &key->tp_range.tp_max.dst,
		       TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_range.tp_max.dst,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.dst));
	fl_set_key_val(tb, &key->tp_range.tp_min.src,
		       TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_range.tp_min.src,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.src));
	fl_set_key_val(tb, &key->tp_range.tp_max.src,
		       TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));

759
760
761
762
763
764
	if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
	    htons(key->tp_range.tp_max.dst) <=
	    htons(key->tp_range.tp_min.dst)) {
		NL_SET_ERR_MSG_ATTR(extack,
				    tb[TCA_FLOWER_KEY_PORT_DST_MIN],
				    "Invalid destination port range (min must be strictly smaller than max)");
765
		return -EINVAL;
766
767
768
769
770
771
772
773
774
	}
	if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
	    htons(key->tp_range.tp_max.src) <=
	    htons(key->tp_range.tp_min.src)) {
		NL_SET_ERR_MSG_ATTR(extack,
				    tb[TCA_FLOWER_KEY_PORT_SRC_MIN],
				    "Invalid source port range (min must be strictly smaller than max)");
		return -EINVAL;
	}
775
776
777
778

	return 0;
}

779
780
static int fl_set_key_mpls(struct nlattr **tb,
			   struct flow_dissector_key_mpls *key_val,
781
782
			   struct flow_dissector_key_mpls *key_mask,
			   struct netlink_ext_ack *extack)
783
784
785
786
787
788
789
790
{
	if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
		key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
		key_mask->mpls_ttl = MPLS_TTL_MASK;
	}
	if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
		u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);

791
792
793
794
		if (bos & ~MPLS_BOS_MASK) {
			NL_SET_ERR_MSG_ATTR(extack,
					    tb[TCA_FLOWER_KEY_MPLS_BOS],
					    "Bottom Of Stack (BOS) must be 0 or 1");
795
			return -EINVAL;
796
		}
797
798
799
800
801
802
		key_val->mpls_bos = bos;
		key_mask->mpls_bos = MPLS_BOS_MASK;
	}
	if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
		u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);

803
804
805
806
		if (tc & ~MPLS_TC_MASK) {
			NL_SET_ERR_MSG_ATTR(extack,
					    tb[TCA_FLOWER_KEY_MPLS_TC],
					    "Traffic Class (TC) must be between 0 and 7");
807
			return -EINVAL;
808
		}
809
810
811
812
813
814
		key_val->mpls_tc = tc;
		key_mask->mpls_tc = MPLS_TC_MASK;
	}
	if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
		u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);

815
816
817
818
		if (label & ~MPLS_LABEL_MASK) {
			NL_SET_ERR_MSG_ATTR(extack,
					    tb[TCA_FLOWER_KEY_MPLS_LABEL],
					    "Label must be between 0 and 1048575");
819
			return -EINVAL;
820
		}
821
822
823
824
825
826
		key_val->mpls_label = label;
		key_mask->mpls_label = MPLS_LABEL_MASK;
	}
	return 0;
}

827
static void fl_set_key_vlan(struct nlattr **tb,
828
829
			    __be16 ethertype,
			    int vlan_id_key, int vlan_prio_key,
830
831
832
833
834
			    struct flow_dissector_key_vlan *key_val,
			    struct flow_dissector_key_vlan *key_mask)
{
#define VLAN_PRIORITY_MASK	0x7

835
	if (tb[vlan_id_key]) {
836
		key_val->vlan_id =
837
			nla_get_u16(tb[vlan_id_key]) & VLAN_VID_MASK;
838
839
		key_mask->vlan_id = VLAN_VID_MASK;
	}
840
	if (tb[vlan_prio_key]) {
841
		key_val->vlan_priority =
842
			nla_get_u8(tb[vlan_prio_key]) &
843
844
845
			VLAN_PRIORITY_MASK;
		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
	}
846
847
	key_val->vlan_tpid = ethertype;
	key_mask->vlan_tpid = cpu_to_be16(~0);
848
849
}

850
851
852
853
854
855
856
857
858
859
860
static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
			    u32 *dissector_key, u32 *dissector_mask,
			    u32 flower_flag_bit, u32 dissector_flag_bit)
{
	if (flower_mask & flower_flag_bit) {
		*dissector_mask |= dissector_flag_bit;
		if (flower_key & flower_flag_bit)
			*dissector_key |= dissector_flag_bit;
	}
}

861
862
static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key,
			    u32 *flags_mask, struct netlink_ext_ack *extack)
863
864
865
866
{
	u32 key, mask;

	/* mask is mandatory for flags */
867
868
	if (!tb[TCA_FLOWER_KEY_FLAGS_MASK]) {
		NL_SET_ERR_MSG(extack, "Missing flags mask");
869
		return -EINVAL;
870
	}
871
872
873
874
875
876
877
878
879

	key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
	mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));

	*flags_key  = 0;
	*flags_mask = 0;

	fl_set_key_flag(key, mask, flags_key, flags_mask,
			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
880
881
882
	fl_set_key_flag(key, mask, flags_key, flags_mask,
			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
			FLOW_DIS_FIRST_FRAG);
883
884
885
886

	return 0;
}

887
static void fl_set_key_ip(struct nlattr **tb, bool encap,
888
889
890
			  struct flow_dissector_key_ip *key,
			  struct flow_dissector_key_ip *mask)
{
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
	int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
	int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
	int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
	int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;

	fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos));
	fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
}

static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
			     int depth, int option_len,
			     struct netlink_ext_ack *extack)
{
	struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
	struct nlattr *class = NULL, *type = NULL, *data = NULL;
	struct geneve_opt *opt;
	int err, data_len = 0;

	if (option_len > sizeof(struct geneve_opt))
		data_len = option_len - sizeof(struct geneve_opt);

	opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
	memset(opt, 0xff, option_len);
	opt->length = data_len / 4;
	opt->r1 = 0;
	opt->r2 = 0;
	opt->r3 = 0;

	/* If no mask has been prodived we assume an exact match. */
	if (!depth)
		return sizeof(struct geneve_opt) + data_len;

	if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
		NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
		return -EINVAL;
	}

928
929
930
	err = nla_parse_nested_deprecated(tb,
					  TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
					  nla, geneve_opt_policy, extack);
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
	if (err < 0)
		return err;

	/* We are not allowed to omit any of CLASS, TYPE or DATA
	 * fields from the key.
	 */
	if (!option_len &&
	    (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
		NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
		return -EINVAL;
	}

	/* Omitting any of CLASS, TYPE or DATA fields is allowed
	 * for the mask.
	 */
	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
		int new_len = key->enc_opts.len;

		data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
		data_len = nla_len(data);
		if (data_len < 4) {
			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
			return -ERANGE;
		}
		if (data_len % 4) {
			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
			return -ERANGE;
		}

		new_len += sizeof(struct geneve_opt) + data_len;
		BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
		if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
			NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
			return -ERANGE;
		}
		opt->length = data_len / 4;
		memcpy(opt->opt_data, nla_data(data), data_len);
	}

	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
		class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
		opt->opt_class = nla_get_be16(class);
	}

	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
		type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
		opt->type = nla_get_u8(type);
	}

	return sizeof(struct geneve_opt) + data_len;
}

985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key,
			    int depth, int option_len,
			    struct netlink_ext_ack *extack)
{
	struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1];
	struct vxlan_metadata *md;
	int err;

	md = (struct vxlan_metadata *)&key->enc_opts.data[key->enc_opts.len];
	memset(md, 0xff, sizeof(*md));

	if (!depth)
		return sizeof(*md);

	if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_VXLAN) {
		NL_SET_ERR_MSG(extack, "Non-vxlan option type for mask");
		return -EINVAL;
	}

	err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX, nla,
			       vxlan_opt_policy, extack);
	if (err < 0)
		return err;

	if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) {
		NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp");
		return -EINVAL;
	}

	if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP])
		md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]);

	return sizeof(*md);
}

static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
			     int depth, int option_len,
			     struct netlink_ext_ack *extack)
{
	struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1];
	struct erspan_metadata *md;
	int err;

	md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len];
	memset(md, 0xff, sizeof(*md));
	md->version = 1;

	if (!depth)
		return sizeof(*md);

	if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_ERSPAN) {
		NL_SET_ERR_MSG(extack, "Non-erspan option type for mask");
		return -EINVAL;
	}

	err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX, nla,
			       erspan_opt_policy, extack);
	if (err < 0)
		return err;

	if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]) {
		NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver");
		return -EINVAL;
	}

	if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER])
		md->version = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]);

	if (md->version == 1) {
		if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
			NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index");
			return -EINVAL;
		}
		if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
			nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX];
			md->u.index = nla_get_be32(nla);
		}
	} else if (md->version == 2) {
		if (!option_len && (!tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] ||
				    !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID])) {
			NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid");
			return -EINVAL;
		}
		if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) {
			nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR];
			md->u.md2.dir = nla_get_u8(nla);
		}
		if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) {
			nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID];
			set_hwid(&md->u.md2, nla_get_u8(nla));
		}
	} else {
		NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect");
		return -EINVAL;
	}

	return sizeof(*md);
}

1084
1085
1086
1087
1088
1089
1090
static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
			  struct fl_flow_key *mask,
			  struct netlink_ext_ack *extack)
{
	const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
	int err, option_len, key_depth, msk_depth = 0;

1091
1092
1093
	err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS],
					     TCA_FLOWER_KEY_ENC_OPTS_MAX,
					     enc_opts_policy, extack);
1094
1095
1096
1097
	if (err)
		return err;

	nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
1098

1099
	if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
1100
1101
1102
		err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
						     TCA_FLOWER_KEY_ENC_OPTS_MAX,
						     enc_opts_policy, extack);
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
		if (err)
			return err;

		nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
		msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
	}

	nla_for_each_attr(nla_opt_key, nla_enc_key,
			  nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
		switch (nla_type(nla_opt_key)) {
		case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
1114
1115
1116
1117
1118
			if (key->enc_opts.dst_opt_type &&
			    key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) {
				NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
				return -EINVAL;
			}
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
			option_len = 0;
			key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
			option_len = fl_set_geneve_opt(nla_opt_key, key,
						       key_depth, option_len,
						       extack);
			if (option_len < 0)
				return option_len;

			key->enc_opts.len += option_len;
			/* At the same time we need to parse through the mask
			 * in order to verify exact and mask attribute lengths.
			 */
			mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
			option_len = fl_set_geneve_opt(nla_opt_msk, mask,
						       msk_depth, option_len,
						       extack);
			if (option_len < 0)
				return option_len;

			mask->enc_opts.len += option_len;
			if (key->enc_opts.len != mask->enc_opts.len) {
				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
				return -EINVAL;
			}

1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
			if (msk_depth)
				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
			break;
		case TCA_FLOWER_KEY_ENC_OPTS_VXLAN:
			if (key->enc_opts.dst_opt_type) {
				NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options");
				return -EINVAL;
			}
			option_len = 0;
			key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
			option_len = fl_set_vxlan_opt(nla_opt_key, key,
						      key_depth, option_len,
						      extack);
			if (option_len < 0)
				return option_len;

			key->enc_opts.len += option_len;
			/* At the same time we need to parse through the mask
			 * in order to verify exact and mask attribute lengths.
			 */
			mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
			option_len = fl_set_vxlan_opt(nla_opt_msk, mask,
						      msk_depth, option_len,
						      extack);
			if (option_len < 0)
				return option_len;

			mask->enc_opts.len += option_len;
			if (key->enc_opts.len != mask->enc_opts.len) {
				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
				return -EINVAL;
			}

			if (msk_depth)
				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
			break;
		case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN:
			if (key->enc_opts.dst_opt_type) {
				NL_SET_ERR_MSG(extack, "Duplicate type for erspan options");
				return -EINVAL;
			}
			option_len = 0;
			key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
			option_len = fl_set_erspan_opt(nla_opt_key, key,
						       key_depth, option_len,
						       extack);
			if (option_len < 0)
				return option_len;

			key->enc_opts.len += option_len;
			/* At the same time we need to parse through the mask
			 * in order to verify exact and mask attribute lengths.
			 */
			mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
			option_len = fl_set_erspan_opt(nla_opt_msk, mask,
						       msk_depth, option_len,
						       extack);
			if (option_len < 0)
				return option_len;

			mask->enc_opts.len += option_len;
			if (key->enc_opts.len != mask->enc_opts.len) {
				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
				return -EINVAL;
			}

1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
			if (msk_depth)
				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
			break;
		default:
			NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
			return -EINVAL;
		}
	}

	return 0;
1220
1221
}

1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
static int fl_set_key_ct(struct nlattr **tb,
			 struct flow_dissector_key_ct *key,
			 struct flow_dissector_key_ct *mask,
			 struct netlink_ext_ack *extack)
{
	if (tb[TCA_FLOWER_KEY_CT_STATE]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) {
			NL_SET_ERR_MSG(extack, "Conntrack isn't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
			       &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
			       sizeof(key->ct_state));
	}
	if (tb[TCA_FLOWER_KEY_CT_ZONE]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {
			NL_SET_ERR_MSG(extack, "Conntrack zones isn't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, &key->ct_zone, TCA_FLOWER_KEY_CT_ZONE,
			       &mask->ct_zone, TCA_FLOWER_KEY_CT_ZONE_MASK,
			       sizeof(key->ct_zone));
	}
	if (tb[TCA_FLOWER_KEY_CT_MARK]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) {
			NL_SET_ERR_MSG(extack, "Conntrack mark isn't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, &key->ct_mark, TCA_FLOWER_KEY_CT_MARK,
			       &mask->ct_mark, TCA_FLOWER_KEY_CT_MARK_MASK,
			       sizeof(key->ct_mark));
	}
	if (tb[TCA_FLOWER_KEY_CT_LABELS]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) {
			NL_SET_ERR_MSG(extack, "Conntrack labels aren't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, key->ct_labels, TCA_FLOWER_KEY_CT_LABELS,
			       mask->ct_labels, TCA_FLOWER_KEY_CT_LABELS_MASK,
			       sizeof(key->ct_labels));
	}

	return 0;
}

1267
static int fl_set_key(struct net *net, struct nlattr **tb,
1268
1269
		      struct fl_flow_key *key, struct fl_flow_key *mask,
		      struct netlink_ext_ack *extack)
1270
{
1271
	__be16 ethertype;
1272
	int ret = 0;
1273

1274
	if (tb[TCA_FLOWER_INDEV]) {
1275
		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
1276
1277
		if (err < 0)
			return err;
1278
1279
		key->meta.ingress_ifindex = err;
		mask->meta.ingress_ifindex = 0xffffffff;
1280
1281
1282
1283
1284
1285
1286
1287
1288
	}

	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
		       sizeof(key->eth.dst));
	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
		       sizeof(key->eth.src));

1289
1290
1291
	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);

1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
		if (eth_type_vlan(ethertype)) {
			fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
					TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
					&mask->vlan);

			if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
				ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
				if (eth_type_vlan(ethertype)) {
					fl_set_key_vlan(tb, ethertype,
							TCA_FLOWER_KEY_CVLAN_ID,
							TCA_FLOWER_KEY_CVLAN_PRIO,
							&key->cvlan, &mask->cvlan);
					fl_set_key_val(tb, &key->basic.n_proto,
						       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
						       &mask->basic.n_proto,
						       TCA_FLOWER_UNSPEC,
						       sizeof(key->basic.n_proto));
				} else {
					key->basic.n_proto = ethertype;
					mask->basic.n_proto = cpu_to_be16(~0);
				}
			}
1314
1315
1316
1317
1318
		} else {
			key->basic.n_proto = ethertype;
			mask->basic.n_proto = cpu_to_be16(~0);
		}
	}
1319
1320
1321
1322
1323
1324

	if (key->basic.n_proto == htons(ETH_P_IP) ||
	    key->basic.n_proto == htons(ETH_P_IPV6)) {
		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
			       sizeof(key->basic.ip_proto));
1325
		fl_set_key_ip(tb, false, &key->ip, &mask->ip);
1326
1327
1328
1329
	}

	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1330
		mask->control.addr_type = ~0;
1331
1332
1333
1334
1335
1336
1337
1338
		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
			       sizeof(key->ipv4.src));
		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
			       sizeof(key->ipv4.dst));
	} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1339
		mask->control.addr_type = ~0;
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
			       sizeof(key->ipv6.src));
		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
			       sizeof(key->ipv6.dst));
	}

	if (key->basic.ip_proto == IPPROTO_TCP) {
		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
1350
			       &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
1351
1352
			       sizeof(key->tp.src));
		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
1353
			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
1354
			       sizeof(key->tp.dst));
1355
1356
1357
		fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS