cls_flower.c 73.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
3
4
5
6
7
8
9
10
11
/*
 * net/sched/cls_flower.c		Flower classifier
 *
 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rhashtable.h>
12
#include <linux/workqueue.h>
13
#include <linux/refcount.h>
14
15
16
17

#include <linux/if_ether.h>
#include <linux/in6.h>
#include <linux/ip.h>
18
#include <linux/mpls.h>
19
20
21
22
23

#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
24
#include <net/geneve.h>
25

26
27
28
#include <net/dst.h>
#include <net/dst_metadata.h>

29
30
#include <uapi/linux/netfilter/nf_conntrack_common.h>

31
struct fl_flow_key {
32
	struct flow_dissector_key_meta meta;
33
	struct flow_dissector_key_control control;
34
	struct flow_dissector_key_control enc_control;
35
36
	struct flow_dissector_key_basic basic;
	struct flow_dissector_key_eth_addrs eth;
37
	struct flow_dissector_key_vlan vlan;
38
	struct flow_dissector_key_vlan cvlan;
39
40
41
42
43
	union {
		struct flow_dissector_key_ipv4_addrs ipv4;
		struct flow_dissector_key_ipv6_addrs ipv6;
	};
	struct flow_dissector_key_ports tp;
44
45
	struct flow_dissector_key_icmp icmp;
	struct flow_dissector_key_arp arp;
46
47
48
49
50
	struct flow_dissector_key_keyid enc_key_id;
	union {
		struct flow_dissector_key_ipv4_addrs enc_ipv4;
		struct flow_dissector_key_ipv6_addrs enc_ipv6;
	};
51
	struct flow_dissector_key_ports enc_tp;
52
	struct flow_dissector_key_mpls mpls;
53
54
	struct flow_dissector_key_tcp tcp;
	struct flow_dissector_key_ip ip;
55
56
	struct flow_dissector_key_ip enc_ip;
	struct flow_dissector_key_enc_opts enc_opts;
57
58
59
60
61
62
63
	union {
		struct flow_dissector_key_ports tp;
		struct {
			struct flow_dissector_key_ports tp_min;
			struct flow_dissector_key_ports tp_max;
		};
	} tp_range;
64
	struct flow_dissector_key_ct ct;
65
66
67
68
69
70
71
72
73
74
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */

struct fl_flow_mask_range {
	unsigned short int start;
	unsigned short int end;
};

struct fl_flow_mask {
	struct fl_flow_key key;
	struct fl_flow_mask_range range;
75
	u32 flags;
76
77
78
79
80
81
82
	struct rhash_head ht_node;
	struct rhashtable ht;
	struct rhashtable_params filter_ht_params;
	struct flow_dissector dissector;
	struct list_head filters;
	struct rcu_work rwork;
	struct list_head list;
83
	refcount_t refcnt;
84
85
};

86
87
88
89
90
91
92
struct fl_flow_tmplt {
	struct fl_flow_key dummy_key;
	struct fl_flow_key mask;
	struct flow_dissector dissector;
	struct tcf_chain *chain;
};

93
94
struct cls_fl_head {
	struct rhashtable ht;
95
	spinlock_t masks_lock; /* Protect masks list */
96
	struct list_head masks;
97
	struct list_head hw_filters;
98
	struct rcu_work rwork;
99
	struct idr handle_idr;
100
101
102
};

struct cls_fl_filter {
103
	struct fl_flow_mask *mask;
104
105
106
107
108
109
	struct rhash_head ht_node;
	struct fl_flow_key mkey;
	struct tcf_exts exts;
	struct tcf_result res;
	struct fl_flow_key key;
	struct list_head list;
110
	struct list_head hw_list;
111
	u32 handle;
112
	u32 flags;
113
	u32 in_hw_count;
114
	struct rcu_work rwork;
115
	struct net_device *hw_dev;
116
117
118
119
120
121
	/* Flower classifier is unlocked, which means that its reference counter
	 * can be changed concurrently without any kind of external
	 * synchronization. Use atomic reference counter to be concurrency-safe.
	 */
	refcount_t refcnt;
	bool deleted;
122
123
};

124
125
126
127
128
129
130
static const struct rhashtable_params mask_ht_params = {
	.key_offset = offsetof(struct fl_flow_mask, key),
	.key_len = sizeof(struct fl_flow_key),
	.head_offset = offsetof(struct fl_flow_mask, ht_node),
	.automatic_shrinking = true,
};

131
132
133
134
135
136
137
138
139
static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
{
	return mask->range.end - mask->range.start;
}

static void fl_mask_update_range(struct fl_flow_mask *mask)
{
	const u8 *bytes = (const u8 *) &mask->key;
	size_t size = sizeof(mask->key);
140
	size_t i, first = 0, last;
141

142
143
144
145
146
147
148
149
	for (i = 0; i < size; i++) {
		if (bytes[i]) {
			first = i;
			break;
		}
	}
	last = first;
	for (i = size - 1; i != first; i--) {
150
151
		if (bytes[i]) {
			last = i;
152
			break;
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
		}
	}
	mask->range.start = rounddown(first, sizeof(long));
	mask->range.end = roundup(last + 1, sizeof(long));
}

static void *fl_key_get_start(struct fl_flow_key *key,
			      const struct fl_flow_mask *mask)
{
	return (u8 *) key + mask->range.start;
}

static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
			      struct fl_flow_mask *mask)
{
	const long *lkey = fl_key_get_start(key, mask);
	const long *lmask = fl_key_get_start(&mask->key, mask);
	long *lmkey = fl_key_get_start(mkey, mask);
	int i;

	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
		*lmkey++ = *lkey++ & *lmask++;
}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
static bool fl_mask_fits_tmplt(struct fl_flow_tmplt *tmplt,
			       struct fl_flow_mask *mask)
{
	const long *lmask = fl_key_get_start(&mask->key, mask);
	const long *ltmplt;
	int i;

	if (!tmplt)
		return true;
	ltmplt = fl_key_get_start(&tmplt->mask, mask);
	for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) {
		if (~*ltmplt++ & *lmask++)
			return false;
	}
	return true;
}

194
195
196
197
198
199
static void fl_clear_masked_range(struct fl_flow_key *key,
				  struct fl_flow_mask *mask)
{
	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
}

200
201
202
203
204
205
static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
				  struct fl_flow_key *key,
				  struct fl_flow_key *mkey)
{
	__be16 min_mask, max_mask, min_val, max_val;

206
207
208
209
	min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
	max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
	min_val = htons(filter->key.tp_range.tp_min.dst);
	max_val = htons(filter->key.tp_range.tp_max.dst);
210
211

	if (min_mask && max_mask) {
212
213
		if (htons(key->tp_range.tp.dst) < min_val ||
		    htons(key->tp_range.tp.dst) > max_val)
214
215
216
			return false;

		/* skb does not have min and max values */
217
218
		mkey->tp_range.tp_min.dst = filter->mkey.tp_range.tp_min.dst;
		mkey->tp_range.tp_max.dst = filter->mkey.tp_range.tp_max.dst;
219
220
221
222
223
224
225
226
227
228
	}
	return true;
}

static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
				  struct fl_flow_key *key,
				  struct fl_flow_key *mkey)
{
	__be16 min_mask, max_mask, min_val, max_val;

229
230
231
232
	min_mask = htons(filter->mask->key.tp_range.tp_min.src);
	max_mask = htons(filter->mask->key.tp_range.tp_max.src);
	min_val = htons(filter->key.tp_range.tp_min.src);
	max_val = htons(filter->key.tp_range.tp_max.src);
233
234

	if (min_mask && max_mask) {
235
236
		if (htons(key->tp_range.tp.src) < min_val ||
		    htons(key->tp_range.tp.src) > max_val)
237
238
239
			return false;

		/* skb does not have min and max values */
240
241
		mkey->tp_range.tp_min.src = filter->mkey.tp_range.tp_min.src;
		mkey->tp_range.tp_max.src = filter->mkey.tp_range.tp_max.src;
242
243
244
245
246
247
	}
	return true;
}

static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask,
					 struct fl_flow_key *mkey)
248
{
249
250
	return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
				      mask->filter_ht_params);
251
252
}

253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
					     struct fl_flow_key *mkey,
					     struct fl_flow_key *key)
{
	struct cls_fl_filter *filter, *f;

	list_for_each_entry_rcu(filter, &mask->filters, list) {
		if (!fl_range_port_dst_cmp(filter, key, mkey))
			continue;

		if (!fl_range_port_src_cmp(filter, key, mkey))
			continue;

		f = __fl_lookup(mask, mkey);
		if (f)
			return f;
	}
	return NULL;
}

static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
				       struct fl_flow_key *mkey,
				       struct fl_flow_key *key)
{
	if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
		return fl_lookup_range(mask, mkey, key);

	return __fl_lookup(mask, mkey);
}

283
284
285
286
287
288
289
290
291
292
293
294
295
static u16 fl_ct_info_to_flower_map[] = {
	[IP_CT_ESTABLISHED] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED,
	[IP_CT_RELATED] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_RELATED,
	[IP_CT_ESTABLISHED_REPLY] =	TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED,
	[IP_CT_RELATED_REPLY] =		TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_RELATED,
	[IP_CT_NEW] =			TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
					TCA_FLOWER_KEY_CT_FLAGS_NEW,
};

296
297
298
299
300
static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		       struct tcf_result *res)
{
	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
	struct fl_flow_key skb_mkey;
301
302
303
	struct fl_flow_key skb_key;
	struct fl_flow_mask *mask;
	struct cls_fl_filter *f;
304

305
306
	list_for_each_entry_rcu(mask, &head->masks, list) {
		fl_clear_masked_range(&skb_key, mask);
307

308
		skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
309
310
311
312
313
		/* skb_flow_dissect() does not set n_proto in case an unknown
		 * protocol, so do it rather here.
		 */
		skb_key.basic.n_proto = skb->protocol;
		skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
314
315
316
		skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
				    fl_ct_info_to_flower_map,
				    ARRAY_SIZE(fl_ct_info_to_flower_map));
317
		skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
318

319
		fl_set_masked_key(&skb_mkey, &skb_key, mask);
320

321
		f = fl_lookup(mask, &skb_mkey, &skb_key);
322
323
324
325
		if (f && !tc_skip_sw(f->flags)) {
			*res = f->res;
			return tcf_exts_exec(skb, &f->exts, res);
		}
326
327
328
329
330
331
332
333
334
335
336
337
	}
	return -1;
}

static int fl_init(struct tcf_proto *tp)
{
	struct cls_fl_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (!head)
		return -ENOBUFS;

338
	spin_lock_init(&head->masks_lock);
339
	INIT_LIST_HEAD_RCU(&head->masks);
340
	INIT_LIST_HEAD(&head->hw_filters);
341
	rcu_assign_pointer(tp->root, head);
342
	idr_init(&head->handle_idr);
343

344
345
346
	return rhashtable_init(&head->ht, &mask_ht_params);
}

347
static void fl_mask_free(struct fl_flow_mask *mask, bool mask_init_done)
348
{
349
350
351
352
353
	/* temporary masks don't have their filters list and ht initialized */
	if (mask_init_done) {
		WARN_ON(!list_empty(&mask->filters));
		rhashtable_destroy(&mask->ht);
	}
354
355
356
357
358
359
360
361
	kfree(mask);
}

static void fl_mask_free_work(struct work_struct *work)
{
	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
						 struct fl_flow_mask, rwork);

362
363
364
365
366
367
368
369
370
	fl_mask_free(mask, true);
}

static void fl_uninit_mask_free_work(struct work_struct *work)
{
	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
						 struct fl_flow_mask, rwork);

	fl_mask_free(mask, false);
371
372
}

373
static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask)
374
{
375
	if (!refcount_dec_and_test(&mask->refcnt))
376
377
378
		return false;

	rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
379
380

	spin_lock(&head->masks_lock);
381
	list_del_rcu(&mask->list);
382
383
384
	spin_unlock(&head->masks_lock);

	tcf_queue_work(&mask->rwork, fl_mask_free_work);
385
386

	return true;
387
388
}

389
390
391
392
393
394
395
396
397
398
static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
{
	/* Flower classifier only changes root pointer during init and destroy.
	 * Users must obtain reference to tcf_proto instance before calling its
	 * API, so tp->root pointer is protected from concurrent call to
	 * fl_destroy() by reference counting.
	 */
	return rcu_dereference_raw(tp->root);
}

399
400
401
402
403
404
405
406
407
static void __fl_destroy_filter(struct cls_fl_filter *f)
{
	tcf_exts_destroy(&f->exts);
	tcf_exts_put_net(&f->exts);
	kfree(f);
}

static void fl_destroy_filter_work(struct work_struct *work)
{
408
409
	struct cls_fl_filter *f = container_of(to_rcu_work(work),
					struct cls_fl_filter, rwork);
410
411
412
413

	__fl_destroy_filter(f);
}

414
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
415
				 bool rtnl_held, struct netlink_ext_ack *extack)
416
{
417
	struct tcf_block *block = tp->chain->block;
418
	struct flow_cls_offload cls_flower = {};
419

420
	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
421
	cls_flower.command = FLOW_CLS_DESTROY;
422
	cls_flower.cookie = (unsigned long) f;
423

424
425
	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false,
			    &f->flags, &f->in_hw_count, rtnl_held);
426

427
428
}

429
static int fl_hw_replace_filter(struct tcf_proto *tp,
430
				struct cls_fl_filter *f, bool rtnl_held,
431
				struct netlink_ext_ack *extack)
432
{
433
	struct tcf_block *block = tp->chain->block;
434
	struct flow_cls_offload cls_flower = {};
435
	bool skip_sw = tc_skip_sw(f->flags);
436
437
	int err = 0;

438
	cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
439
440
	if (!cls_flower.rule)
		return -ENOMEM;
441

442
	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
443
	cls_flower.command = FLOW_CLS_REPLACE;
444
	cls_flower.cookie = (unsigned long) f;
445
446
447
	cls_flower.rule->match.dissector = &f->mask->dissector;
	cls_flower.rule->match.mask = &f->mask->key;
	cls_flower.rule->match.key = &f->mkey;
448
	cls_flower.classid = f->res.classid;
449

450
451
	err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
				   rtnl_held);
452
453
	if (err) {
		kfree(cls_flower.rule);
454
		if (skip_sw) {
455
			NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
456
457
458
			return err;
		}
		return 0;
459
460
	}

461
462
463
	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
			      skip_sw, &f->flags, &f->in_hw_count, rtnl_held);
	tc_cleanup_flow_action(&cls_flower.rule->action);
464
465
	kfree(cls_flower.rule);

466
467
468
	if (err) {
		fl_hw_destroy_filter(tp, f, rtnl_held, NULL);
		return err;
469
	}
470

471
472
	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
		return -EINVAL;
473

474
	return 0;
475
476
}

477
478
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
			       bool rtnl_held)
479
{
480
	struct tcf_block *block = tp->chain->block;
481
	struct flow_cls_offload cls_flower = {};
482

483
	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
484
	cls_flower.command = FLOW_CLS_STATS;
485
	cls_flower.cookie = (unsigned long) f;
486
	cls_flower.classid = f->res.classid;
487

488
489
	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
			 rtnl_held);
490
491
492
493

	tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
			      cls_flower.stats.pkts,
			      cls_flower.stats.lastused);
494
}
495

496
static void __fl_put(struct cls_fl_filter *f)
497
{
498
499
	if (!refcount_dec_and_test(&f->refcnt))
		return;
500

501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
	if (tcf_exts_get_net(&f->exts))
		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
	else
		__fl_destroy_filter(f);
}

static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
{
	struct cls_fl_filter *f;

	rcu_read_lock();
	f = idr_find(&head->handle_idr, handle);
	if (f && !refcount_inc_not_zero(&f->refcnt))
		f = NULL;
	rcu_read_unlock();

	return f;
}

static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
		       bool *last, bool rtnl_held,
		       struct netlink_ext_ack *extack)
{
	struct cls_fl_head *head = fl_head_dereference(tp);

	*last = false;

	spin_lock(&tp->lock);
	if (f->deleted) {
		spin_unlock(&tp->lock);
		return -ENOENT;
	}

	f->deleted = true;
	rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
			       f->mask->filter_ht_params);
537
	idr_remove(&head->handle_idr, f->handle);
538
	list_del_rcu(&f->list);
539
540
541
	spin_unlock(&tp->lock);

	*last = fl_mask_put(head, f->mask);
542
	if (!tc_skip_hw(f->flags))
543
		fl_hw_destroy_filter(tp, f, rtnl_held, extack);
544
	tcf_unbind_filter(tp, &f->res);
545
	__fl_put(f);
546

547
	return 0;
548
549
}

550
551
static void fl_destroy_sleepable(struct work_struct *work)
{
552
553
554
555
556
	struct cls_fl_head *head = container_of(to_rcu_work(work),
						struct cls_fl_head,
						rwork);

	rhashtable_destroy(&head->ht);
557
558
559
560
	kfree(head);
	module_put(THIS_MODULE);
}

561
562
static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
		       struct netlink_ext_ack *extack)
563
{
564
	struct cls_fl_head *head = fl_head_dereference(tp);
565
	struct fl_flow_mask *mask, *next_mask;
566
	struct cls_fl_filter *f, *next;
567
	bool last;
568

569
570
	list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
		list_for_each_entry_safe(f, next, &mask->filters, list) {
571
572
			__fl_delete(tp, f, &last, rtnl_held, extack);
			if (last)
573
574
575
				break;
		}
	}
576
	idr_destroy(&head->handle_idr);
577
578

	__module_get(THIS_MODULE);
579
	tcf_queue_work(&head->rwork, fl_destroy_sleepable);
580
581
}

582
583
584
585
586
587
588
static void fl_put(struct tcf_proto *tp, void *arg)
{
	struct cls_fl_filter *f = arg;

	__fl_put(f);
}

589
static void *fl_get(struct tcf_proto *tp, u32 handle)
590
{
591
	struct cls_fl_head *head = fl_head_dereference(tp);
592

593
	return __fl_get(head, handle);
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
}

static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
					    .len = IFNAMSIZ },
	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_KEY_ID]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_SRC]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_DST]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ENC_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
	[TCA_FLOWER_KEY_TCP_SRC_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_TCP_DST_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_SRC_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_UDP_DST_MASK]	= { .type = NLA_U16 },
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
	[TCA_FLOWER_KEY_SCTP_SRC_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_SCTP_DST_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_SCTP_SRC]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_SCTP_DST]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_FLAGS]		= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_FLAGS_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ICMPV4_TYPE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV4_CODE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_TYPE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_CODE]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ARP_SIP]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_SIP_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_TIP]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_TIP_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_ARP_OP]		= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ARP_OP_MASK]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ARP_SHA]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ARP_SHA_MASK]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ARP_THA]	= { .len = ETH_ALEN },
	[TCA_FLOWER_KEY_ARP_THA_MASK]	= { .len = ETH_ALEN },
663
664
665
666
	[TCA_FLOWER_KEY_MPLS_TTL]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_MPLS_BOS]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_MPLS_TC]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_MPLS_LABEL]	= { .type = NLA_U32 },
667
668
669
670
671
672
	[TCA_FLOWER_KEY_TCP_FLAGS]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_TCP_FLAGS_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_IP_TOS]		= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
673
674
675
676
677
678
679
680
681
	[TCA_FLOWER_KEY_CVLAN_ID]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CVLAN_PRIO]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_IP_TOS]	= { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_IP_TTL]	 = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_OPTS]	= { .type = NLA_NESTED },
	[TCA_FLOWER_KEY_ENC_OPTS_MASK]	= { .type = NLA_NESTED },
682
683
684
685
686
687
688
689
690
691
	[TCA_FLOWER_KEY_CT_STATE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_STATE_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_ZONE]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_ZONE_MASK]	= { .type = NLA_U16 },
	[TCA_FLOWER_KEY_CT_MARK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_CT_MARK_MASK]	= { .type = NLA_U32 },
	[TCA_FLOWER_KEY_CT_LABELS]	= { .type = NLA_BINARY,
					    .len = 128 / BITS_PER_BYTE },
	[TCA_FLOWER_KEY_CT_LABELS_MASK]	= { .type = NLA_BINARY,
					    .len = 128 / BITS_PER_BYTE },
692
693
694
695
696
697
698
699
700
701
702
703
704
};

static const struct nla_policy
enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
	[TCA_FLOWER_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
};

static const struct nla_policy
geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 },
	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 },
	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY,
						       .len = 128 },
705
706
707
708
709
710
711
712
};

static void fl_set_key_val(struct nlattr **tb,
			   void *val, int val_type,
			   void *mask, int mask_type, int len)
{
	if (!tb[val_type])
		return;
713
	nla_memcpy(val, tb[val_type], len);
714
715
716
	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
		memset(mask, 0xff, len);
	else
717
		nla_memcpy(mask, tb[mask_type], len);
718
719
}

720
721
722
static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
				 struct fl_flow_key *mask)
{
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
	fl_set_key_val(tb, &key->tp_range.tp_min.dst,
		       TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_range.tp_min.dst,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.dst));
	fl_set_key_val(tb, &key->tp_range.tp_max.dst,
		       TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_range.tp_max.dst,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.dst));
	fl_set_key_val(tb, &key->tp_range.tp_min.src,
		       TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_range.tp_min.src,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.src));
	fl_set_key_val(tb, &key->tp_range.tp_max.src,
		       TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));

	if ((mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
	     htons(key->tp_range.tp_max.dst) <=
		 htons(key->tp_range.tp_min.dst)) ||
	    (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
	     htons(key->tp_range.tp_max.src) <=
		 htons(key->tp_range.tp_min.src)))
742
743
744
745
746
		return -EINVAL;

	return 0;
}

747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
static int fl_set_key_mpls(struct nlattr **tb,
			   struct flow_dissector_key_mpls *key_val,
			   struct flow_dissector_key_mpls *key_mask)
{
	if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
		key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
		key_mask->mpls_ttl = MPLS_TTL_MASK;
	}
	if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
		u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);

		if (bos & ~MPLS_BOS_MASK)
			return -EINVAL;
		key_val->mpls_bos = bos;
		key_mask->mpls_bos = MPLS_BOS_MASK;
	}
	if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
		u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);

		if (tc & ~MPLS_TC_MASK)
			return -EINVAL;
		key_val->mpls_tc = tc;
		key_mask->mpls_tc = MPLS_TC_MASK;
	}
	if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
		u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);

		if (label & ~MPLS_LABEL_MASK)
			return -EINVAL;
		key_val->mpls_label = label;
		key_mask->mpls_label = MPLS_LABEL_MASK;
	}
	return 0;
}

782
static void fl_set_key_vlan(struct nlattr **tb,
783
784
			    __be16 ethertype,
			    int vlan_id_key, int vlan_prio_key,
785
786
787
788
789
			    struct flow_dissector_key_vlan *key_val,
			    struct flow_dissector_key_vlan *key_mask)
{
#define VLAN_PRIORITY_MASK	0x7

790
	if (tb[vlan_id_key]) {
791
		key_val->vlan_id =
792
			nla_get_u16(tb[vlan_id_key]) & VLAN_VID_MASK;
793
794
		key_mask->vlan_id = VLAN_VID_MASK;
	}
795
	if (tb[vlan_prio_key]) {
796
		key_val->vlan_priority =
797
			nla_get_u8(tb[vlan_prio_key]) &
798
799
800
			VLAN_PRIORITY_MASK;
		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
	}
801
802
	key_val->vlan_tpid = ethertype;
	key_mask->vlan_tpid = cpu_to_be16(~0);
803
804
}

805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
			    u32 *dissector_key, u32 *dissector_mask,
			    u32 flower_flag_bit, u32 dissector_flag_bit)
{
	if (flower_mask & flower_flag_bit) {
		*dissector_mask |= dissector_flag_bit;
		if (flower_key & flower_flag_bit)
			*dissector_key |= dissector_flag_bit;
	}
}

static int fl_set_key_flags(struct nlattr **tb,
			    u32 *flags_key, u32 *flags_mask)
{
	u32 key, mask;

	/* mask is mandatory for flags */
	if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
		return -EINVAL;

	key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
	mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));

	*flags_key  = 0;
	*flags_mask = 0;

	fl_set_key_flag(key, mask, flags_key, flags_mask,
			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
833
834
835
	fl_set_key_flag(key, mask, flags_key, flags_mask,
			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
			FLOW_DIS_FIRST_FRAG);
836
837
838
839

	return 0;
}

840
static void fl_set_key_ip(struct nlattr **tb, bool encap,
841
842
843
			  struct flow_dissector_key_ip *key,
			  struct flow_dissector_key_ip *mask)
{
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
	int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
	int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
	int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
	int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;

	fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos));
	fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
}

static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
			     int depth, int option_len,
			     struct netlink_ext_ack *extack)
{
	struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
	struct nlattr *class = NULL, *type = NULL, *data = NULL;
	struct geneve_opt *opt;
	int err, data_len = 0;

	if (option_len > sizeof(struct geneve_opt))
		data_len = option_len - sizeof(struct geneve_opt);

	opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
	memset(opt, 0xff, option_len);
	opt->length = data_len / 4;
	opt->r1 = 0;
	opt->r2 = 0;
	opt->r3 = 0;

	/* If no mask has been prodived we assume an exact match. */
	if (!depth)
		return sizeof(struct geneve_opt) + data_len;

	if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
		NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
		return -EINVAL;
	}

881
882
883
	err = nla_parse_nested_deprecated(tb,
					  TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
					  nla, geneve_opt_policy, extack);
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
	if (err < 0)
		return err;

	/* We are not allowed to omit any of CLASS, TYPE or DATA
	 * fields from the key.
	 */
	if (!option_len &&
	    (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
		NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
		return -EINVAL;
	}

	/* Omitting any of CLASS, TYPE or DATA fields is allowed
	 * for the mask.
	 */
	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
		int new_len = key->enc_opts.len;

		data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
		data_len = nla_len(data);
		if (data_len < 4) {
			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
			return -ERANGE;
		}
		if (data_len % 4) {
			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
			return -ERANGE;
		}

		new_len += sizeof(struct geneve_opt) + data_len;
		BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
		if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
			NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
			return -ERANGE;
		}
		opt->length = data_len / 4;
		memcpy(opt->opt_data, nla_data(data), data_len);
	}

	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
		class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
		opt->opt_class = nla_get_be16(class);
	}

	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
		type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
		opt->type = nla_get_u8(type);
	}

	return sizeof(struct geneve_opt) + data_len;
}

static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
			  struct fl_flow_key *mask,
			  struct netlink_ext_ack *extack)
{
	const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
	int err, option_len, key_depth, msk_depth = 0;

945
946
947
	err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS],
					     TCA_FLOWER_KEY_ENC_OPTS_MAX,
					     enc_opts_policy, extack);
948
949
950
951
	if (err)
		return err;

	nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
952

953
	if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
954
955
956
		err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
						     TCA_FLOWER_KEY_ENC_OPTS_MAX,
						     enc_opts_policy, extack);
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
		if (err)
			return err;

		nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
		msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
	}

	nla_for_each_attr(nla_opt_key, nla_enc_key,
			  nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
		switch (nla_type(nla_opt_key)) {
		case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
			option_len = 0;
			key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
			option_len = fl_set_geneve_opt(nla_opt_key, key,
						       key_depth, option_len,
						       extack);
			if (option_len < 0)
				return option_len;

			key->enc_opts.len += option_len;
			/* At the same time we need to parse through the mask
			 * in order to verify exact and mask attribute lengths.
			 */
			mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
			option_len = fl_set_geneve_opt(nla_opt_msk, mask,
						       msk_depth, option_len,
						       extack);
			if (option_len < 0)
				return option_len;

			mask->enc_opts.len += option_len;
			if (key->enc_opts.len != mask->enc_opts.len) {
				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
				return -EINVAL;
			}

			if (msk_depth)
				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
			break;
		default:
			NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
			return -EINVAL;
		}
	}

	return 0;
1003
1004
}

1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
static int fl_set_key_ct(struct nlattr **tb,
			 struct flow_dissector_key_ct *key,
			 struct flow_dissector_key_ct *mask,
			 struct netlink_ext_ack *extack)
{
	if (tb[TCA_FLOWER_KEY_CT_STATE]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) {
			NL_SET_ERR_MSG(extack, "Conntrack isn't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
			       &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
			       sizeof(key->ct_state));
	}
	if (tb[TCA_FLOWER_KEY_CT_ZONE]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {
			NL_SET_ERR_MSG(extack, "Conntrack zones isn't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, &key->ct_zone, TCA_FLOWER_KEY_CT_ZONE,
			       &mask->ct_zone, TCA_FLOWER_KEY_CT_ZONE_MASK,
			       sizeof(key->ct_zone));
	}
	if (tb[TCA_FLOWER_KEY_CT_MARK]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) {
			NL_SET_ERR_MSG(extack, "Conntrack mark isn't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, &key->ct_mark, TCA_FLOWER_KEY_CT_MARK,
			       &mask->ct_mark, TCA_FLOWER_KEY_CT_MARK_MASK,
			       sizeof(key->ct_mark));
	}
	if (tb[TCA_FLOWER_KEY_CT_LABELS]) {
		if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) {
			NL_SET_ERR_MSG(extack, "Conntrack labels aren't enabled");
			return -EOPNOTSUPP;
		}
		fl_set_key_val(tb, key->ct_labels, TCA_FLOWER_KEY_CT_LABELS,
			       mask->ct_labels, TCA_FLOWER_KEY_CT_LABELS_MASK,
			       sizeof(key->ct_labels));
	}

	return 0;
}

1050
static int fl_set_key(struct net *net, struct nlattr **tb,
1051
1052
		      struct fl_flow_key *key, struct fl_flow_key *mask,
		      struct netlink_ext_ack *extack)
1053
{
1054
	__be16 ethertype;
1055
	int ret = 0;
1056

1057
	if (tb[TCA_FLOWER_INDEV]) {
1058
		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
1059
1060
		if (err < 0)
			return err;
1061
1062
		key->meta.ingress_ifindex = err;
		mask->meta.ingress_ifindex = 0xffffffff;
1063
1064
1065
1066
1067
1068
1069
1070
1071
	}

	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
		       sizeof(key->eth.dst));
	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
		       sizeof(key->eth.src));

1072
1073
1074
	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);

1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
		if (eth_type_vlan(ethertype)) {
			fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
					TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
					&mask->vlan);

			if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
				ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
				if (eth_type_vlan(ethertype)) {
					fl_set_key_vlan(tb, ethertype,
							TCA_FLOWER_KEY_CVLAN_ID,
							TCA_FLOWER_KEY_CVLAN_PRIO,
							&key->cvlan, &mask->cvlan);
					fl_set_key_val(tb, &key->basic.n_proto,
						       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
						       &mask->basic.n_proto,
						       TCA_FLOWER_UNSPEC,
						       sizeof(key->basic.n_proto));
				} else {
					key->basic.n_proto = ethertype;
					mask->basic.n_proto = cpu_to_be16(~0);
				}
			}
1097
1098
1099
1100
1101
		} else {
			key->basic.n_proto = ethertype;
			mask->basic.n_proto = cpu_to_be16(~0);
		}
	}
1102
1103
1104
1105
1106
1107

	if (key->basic.n_proto == htons(ETH_P_IP) ||
	    key->basic.n_proto == htons(ETH_P_IPV6)) {
		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
			       sizeof(key->basic.ip_proto));
1108
		fl_set_key_ip(tb, false, &key->ip, &mask->ip);
1109
1110
1111
1112
	}

	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1113
		mask->control.addr_type = ~0;
1114
1115
1116
1117
1118
1119
1120
1121
		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
			       sizeof(key->ipv4.src));
		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
			       sizeof(key->ipv4.dst));
	} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1122
		mask->control.addr_type = ~0;
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
			       sizeof(key->ipv6.src));
		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
			       sizeof(key->ipv6.dst));
	}

	if (key->basic.ip_proto == IPPROTO_TCP) {
		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
1133
			       &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
1134
1135
			       sizeof(key->tp.src));
		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
1136
			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
1137
			       sizeof(key->tp.dst));
1138
1139
1140
		fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
			       &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
			       sizeof(key->tcp.flags));
1141
1142
	} else if (key->basic.ip_proto == IPPROTO_UDP) {
		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
1143
			       &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
1144
1145
			       sizeof(key->tp.src));
		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
1146
			       &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
1147
			       sizeof(key->tp.dst));
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
	} else if (key->basic.ip_proto == IPPROTO_SCTP) {
		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
			       &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
			       sizeof(key->tp.src));
		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
			       &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
			       sizeof(key->tp.dst));
	} else if (key->basic.n_proto == htons(ETH_P_IP) &&
		   key->basic.ip_proto == IPPROTO_ICMP) {
		fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
			       &mask->icmp.type,
			       TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
			       sizeof(key->icmp.type));
		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
			       &mask->icmp.code,
			       TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
			       sizeof(key->icmp.code));
	} else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
		   key->basic.ip_proto == IPPROTO_ICMPV6) {
		fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
			       &mask->icmp.type,
			       TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
			       sizeof(key->icmp.type));
		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE,
			       &mask->icmp.code,
			       TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
			       sizeof(key->icmp.code));
1175
1176
1177
1178
1179
	} else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) ||
		   key->basic.n_proto == htons(ETH_P_MPLS_MC)) {
		ret = fl_set_key_mpls(tb, &key->mpls, &mask->mpls);
		if (ret)
			return ret;
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
	} else if (key->basic.n_proto == htons(ETH_P_ARP) ||
		   key->basic.n_proto == htons(ETH_P_RARP)) {
		fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
			       &mask->arp.sip, TCA_FLOWER_KEY_ARP_SIP_MASK,
			       sizeof(key->arp.sip));
		fl_set_key_val(tb, &key->arp.tip, TCA_FLOWER_KEY_ARP_TIP,
			       &mask->arp.tip, TCA_FLOWER_KEY_ARP_TIP_MASK,
			       sizeof(key->arp.tip));
		fl_set_key_val(tb, &key->arp.op, TCA_FLOWER_KEY_ARP_OP,
			       &mask->arp.op, TCA_FLOWER_KEY_ARP_OP_MASK,
			       sizeof(key->arp.op));
		fl_set_key_val(tb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
			       mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
			       sizeof(key->arp.sha));
		fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
			       mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
			       sizeof(key->arp.tha));
1197
1198
	}

1199
1200
1201
1202
1203
1204
1205
1206
	if (key->basic.ip_proto == IPPROTO_TCP ||
	    key->basic.ip_proto == IPPROTO_UDP ||
	    key->basic.ip_proto == IPPROTO_SCTP) {
		ret = fl_set_key_port_range(tb, key, mask);
		if (ret)
			return ret;
	}

1207
1208
1209
	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1210
		mask->enc_control.addr_type = ~0;
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
		fl_set_key_val(tb, &key->enc_ipv4.src,
			       TCA_FLOWER_KEY_ENC_IPV4_SRC,
			       &mask->enc_ipv4.src,
			       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
			       sizeof(key->enc_ipv4.src));
		fl_set_key_val(tb, &key->enc_ipv4.dst,
			       TCA_FLOWER_KEY_ENC_IPV4_DST,
			       &mask->enc_ipv4.dst,
			       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
			       sizeof(key->enc_ipv4.dst));
	}

	if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
	    tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1226
		mask->enc_control.addr_type = ~0;
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
		fl_set_key_val(tb, &key->enc_ipv6.src,
			       TCA_FLOWER_KEY_ENC_IPV6_SRC,
			       &mask->enc_ipv6.src,
			       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
			       sizeof(key->enc_ipv6.src));
		fl_set_key_val(tb, &key->enc_ipv6.dst,
			       TCA_FLOWER_KEY_ENC_IPV6_DST,
			       &mask->enc_ipv6.dst,
			       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
			       sizeof(key->enc_ipv6.dst));
	}

	fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
		       &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
		       sizeof(key->enc_key_id.keyid));

1243
1244
1245
1246
1247
1248
1249
1250
	fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
		       &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
		       sizeof(key->enc_tp.src));

	fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
		       &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
		       sizeof(key->enc_tp.dst));

1251
1252
1253
1254
1255
1256
1257
1258
	fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);

	if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
		ret = fl_set_enc_opt(tb, key, mask, extack);
		if (ret)
			return ret;
	}

1259
1260
1261
1262
	ret = fl_set_key_ct(tb, &key->ct, &mask->ct, extack);
	if (ret)
		return ret;

1263
1264
1265
1266
	if (tb[TCA_FLOWER_KEY_FLAGS])
		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);

	return ret;
1267
1268
}

1269
1270
static void fl_mask_copy(struct fl_flow_mask *dst,
			 struct fl_flow_mask *src)
1271
{
1272
1273
	const void *psrc = fl_key_get_start(&src->key, src);
	void *pdst = fl_key_get_start(&dst->key, src);
1274

1275
1276
	memcpy(pdst, psrc, fl_mask_range(src));
	dst->range = src->range;
1277
1278
1279
1280
1281
1282
1283
1284
}

static const struct rhashtable_params fl_ht_params = {
	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
	.head_offset = offsetof(struct cls_fl_filter, ht_node),
	.automatic_shrinking = true,
};

1285
static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
1286
{
1287
1288
1289
	mask->filter_ht_params = fl_ht_params;
	mask->filter_ht_params.key_len = fl_mask_range(mask);
	mask->filter_ht_params.key_offset += mask->range.start;
1290

1291
	return rhashtable_init(&mask->ht, &mask->filter_ht_params);
1292
1293
1294
}

#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
1295
#define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
1296

1297
1298
1299
#define FL_KEY_IS_MASKED(mask, member)						\
	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
		   0, FL_KEY_MEMBER_SIZE(member))				\
1300
1301
1302
1303
1304
1305
1306
1307

#define FL_KEY_SET(keys, cnt, id, member)					\
	do {									\
		keys[cnt].key_id = id;						\
		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
		cnt++;								\
	} while(0);

1308
#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)			\
1309
	do {									\
1310
		if (FL_KEY_IS_MASKED(mask, member))				\
1311
1312
1313
			FL_KEY_SET(keys, cnt, id, member);			\
	} while(0);

1314
1315
static void fl_init_dissector(struct flow_dissector *dissector,
			      struct fl_flow_key *mask)
1316
1317
1318
1319
{
	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
	size_t cnt = 0;

1320
1321
	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
			     FLOW_DISSECTOR_KEY_META, meta);
1322
1323
	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
1324
	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1325
			     FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
1326
	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1327
			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
1328
	FL_KEY_SET_IF_MASKED(mask, keys, cnt,