seg6_iptunnel.c 11.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
3
4
5
6
7
8
9
10
11
12
13
/*
 *  SR-IPv6 implementation
 *
 *  Author:
 *  David Lebrun <david.lebrun@uclouvain.be>
 */

#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/net.h>
#include <linux/module.h>
#include <net/ip.h>
14
#include <net/ip_tunnels.h>
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <net/lwtunnel.h>
#include <net/netevent.h>
#include <net/netns/generic.h>
#include <net/ip6_fib.h>
#include <net/route.h>
#include <net/seg6.h>
#include <linux/seg6.h>
#include <linux/seg6_iptunnel.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/dst_cache.h>
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif

struct seg6_lwt {
	struct dst_cache cache;
32
	struct seg6_iptunnel_encap tuninfo[];
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
};

static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
{
	return (struct seg6_lwt *)lwt->data;
}

static inline struct seg6_iptunnel_encap *
seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
{
	return seg6_lwt_lwtunnel(lwt)->tuninfo;
}

static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
	[SEG6_IPTUNNEL_SRH]	= { .type = NLA_BINARY },
};

static int nla_put_srh(struct sk_buff *skb, int attrtype,
		       struct seg6_iptunnel_encap *tuninfo)
{
	struct seg6_iptunnel_encap *data;
	struct nlattr *nla;
	int len;

	len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);

	nla = nla_reserve(skb, attrtype, len);
	if (!nla)
		return -EMSGSIZE;

	data = nla_data(nla);
	memcpy(data, tuninfo, len);

	return 0;
}

static void set_tun_src(struct net *net, struct net_device *dev,
			struct in6_addr *daddr, struct in6_addr *saddr)
{
	struct seg6_pernet_data *sdata = seg6_pernet(net);
	struct in6_addr *tun_src;

	rcu_read_lock();

	tun_src = rcu_dereference(sdata->tun_src);

	if (!ipv6_addr_any(tun_src)) {
		memcpy(saddr, tun_src, sizeof(struct in6_addr));
	} else {
		ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
				   saddr);
	}

	rcu_read_unlock();
}

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/* Compute flowlabel for outer IPv6 header */
static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
				  struct ipv6hdr *inner_hdr)
{
	int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
	__be32 flowlabel = 0;
	u32 hash;

	if (do_flowlabel > 0) {
		hash = skb_get_hash(skb);
		hash = rol32(hash, 16);
		flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
	} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
		flowlabel = ip6_flowlabel(inner_hdr);
	}
	return flowlabel;
}

107
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
108
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
109
{
110
111
	struct dst_entry *dst = skb_dst(skb);
	struct net *net = dev_net(dst->dev);
112
113
114
	struct ipv6hdr *hdr, *inner_hdr;
	struct ipv6_sr_hdr *isrh;
	int hdrlen, tot_len, err;
115
	__be32 flowlabel;
116
117
118
119

	hdrlen = (osrh->hdrlen + 1) << 3;
	tot_len = hdrlen + sizeof(*hdr);

120
	err = skb_cow_head(skb, tot_len + skb->mac_len);
121
122
123
124
	if (unlikely(err))
		return err;

	inner_hdr = ipv6_hdr(skb);
125
	flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
126
127
128
129
130
131
132
133
134
135

	skb_push(skb, tot_len);
	skb_reset_network_header(skb);
	skb_mac_header_rebuild(skb);
	hdr = ipv6_hdr(skb);

	/* inherit tc, flowlabel and hlim
	 * hlim will be decremented in ip6_forward() afterwards and
	 * decapsulation will overwrite inner hlim with outer hlim
	 */
136
137
138

	if (skb->protocol == htons(ETH_P_IPV6)) {
		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
139
			     flowlabel);
140
141
		hdr->hop_limit = inner_hdr->hop_limit;
	} else {
142
		ip6_flow_hdr(hdr, 0, flowlabel);
143
		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
144
145

		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
146
147
	}

148
149
150
151
152
	hdr->nexthdr = NEXTHDR_ROUTING;

	isrh = (void *)hdr + sizeof(*hdr);
	memcpy(isrh, osrh, hdrlen);

153
	isrh->nexthdr = proto;
154
155

	hdr->daddr = isrh->segments[isrh->first_segment];
156
	set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
157
158
159
160
161
162
163
164
165
166
167
168
169

#ifdef CONFIG_IPV6_SEG6_HMAC
	if (sr_has_hmac(isrh)) {
		err = seg6_push_hmac(net, &hdr->saddr, isrh);
		if (unlikely(err))
			return err;
	}
#endif

	skb_postpush_rcsum(skb, hdr, tot_len);

	return 0;
}
170
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
171
172

/* insert an SRH within an IPv6 packet, just after the IPv6 header */
173
int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
174
175
176
177
178
179
180
{
	struct ipv6hdr *hdr, *oldhdr;
	struct ipv6_sr_hdr *isrh;
	int hdrlen, err;

	hdrlen = (osrh->hdrlen + 1) << 3;

181
	err = skb_cow_head(skb, hdrlen + skb->mac_len);
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
	if (unlikely(err))
		return err;

	oldhdr = ipv6_hdr(skb);

	skb_pull(skb, sizeof(struct ipv6hdr));
	skb_postpull_rcsum(skb, skb_network_header(skb),
			   sizeof(struct ipv6hdr));

	skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
	skb_reset_network_header(skb);
	skb_mac_header_rebuild(skb);

	hdr = ipv6_hdr(skb);

	memmove(hdr, oldhdr, sizeof(*hdr));

	isrh = (void *)hdr + sizeof(*hdr);
	memcpy(isrh, osrh, hdrlen);

	isrh->nexthdr = hdr->nexthdr;
	hdr->nexthdr = NEXTHDR_ROUTING;

	isrh->segments[0] = hdr->daddr;
	hdr->daddr = isrh->segments[isrh->first_segment];

#ifdef CONFIG_IPV6_SEG6_HMAC
	if (sr_has_hmac(isrh)) {
		struct net *net = dev_net(skb_dst(skb)->dev);

		err = seg6_push_hmac(net, &hdr->saddr, isrh);
		if (unlikely(err))
			return err;
	}
#endif

	skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);

	return 0;
}
222
EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
223
224
225
226
227

static int seg6_do_srh(struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);
	struct seg6_iptunnel_encap *tinfo;
228
	int proto, err = 0;
229
230
231
232
233

	tinfo = seg6_encap_lwtunnel(dst->lwtstate);

	switch (tinfo->mode) {
	case SEG6_IPTUN_MODE_INLINE:
234
235
236
		if (skb->protocol != htons(ETH_P_IPV6))
			return -EINVAL;

237
		err = seg6_do_srh_inline(skb, tinfo->srh);
238
239
		if (err)
			return err;
240
241
		break;
	case SEG6_IPTUN_MODE_ENCAP:
242
243
244
245
		err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
		if (err)
			return err;

246
247
248
249
250
251
252
253
254
255
256
		if (skb->protocol == htons(ETH_P_IPV6))
			proto = IPPROTO_IPV6;
		else if (skb->protocol == htons(ETH_P_IP))
			proto = IPPROTO_IPIP;
		else
			return -EINVAL;

		err = seg6_do_srh_encap(skb, tinfo->srh, proto);
		if (err)
			return err;

257
258
		skb_set_inner_transport_header(skb, skb_transport_offset(skb));
		skb_set_inner_protocol(skb, skb->protocol);
259
		skb->protocol = htons(ETH_P_IPV6);
260
		break;
261
262
263
	case SEG6_IPTUN_MODE_L2ENCAP:
		if (!skb_mac_header_was_set(skb))
			return -EINVAL;
264

265
266
267
268
269
270
		if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
			return -ENOMEM;

		skb_mac_header_rebuild(skb);
		skb_push(skb, skb->mac_len);

271
		err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
272
273
274
275
276
277
		if (err)
			return err;

		skb->protocol = htons(ETH_P_IPV6);
		break;
	}
278
279
280
281
282
283
284
285
286

	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
	skb_set_transport_header(skb, sizeof(struct ipv6hdr));

	return 0;
}

static int seg6_input(struct sk_buff *skb)
{
287
288
289
	struct dst_entry *orig_dst = skb_dst(skb);
	struct dst_entry *dst = NULL;
	struct seg6_lwt *slwt;
290
291
292
293
294
295
296
297
	int err;

	err = seg6_do_srh(skb);
	if (unlikely(err)) {
		kfree_skb(skb);
		return err;
	}

298
299
300
301
302
303
	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);

	preempt_disable();
	dst = dst_cache_get(&slwt->cache);
	preempt_enable();

304
	skb_dst_drop(skb);
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321

	if (!dst) {
		ip6_route_input(skb);
		dst = skb_dst(skb);
		if (!dst->error) {
			preempt_disable();
			dst_cache_set_ip6(&slwt->cache, dst,
					  &ipv6_hdr(skb)->saddr);
			preempt_enable();
		}
	} else {
		skb_dst_set(skb, dst);
	}

	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
	if (unlikely(err))
		return err;
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346

	return dst_input(skb);
}

static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	struct dst_entry *orig_dst = skb_dst(skb);
	struct dst_entry *dst = NULL;
	struct seg6_lwt *slwt;
	int err = -EINVAL;

	err = seg6_do_srh(skb);
	if (unlikely(err))
		goto drop;

	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);

	preempt_disable();
	dst = dst_cache_get(&slwt->cache);
	preempt_enable();

	if (unlikely(!dst)) {
		struct ipv6hdr *hdr = ipv6_hdr(skb);
		struct flowi6 fl6;

347
		memset(&fl6, 0, sizeof(fl6));
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
		fl6.daddr = hdr->daddr;
		fl6.saddr = hdr->saddr;
		fl6.flowlabel = ip6_flowinfo(hdr);
		fl6.flowi6_mark = skb->mark;
		fl6.flowi6_proto = hdr->nexthdr;

		dst = ip6_route_output(net, NULL, &fl6);
		if (dst->error) {
			err = dst->error;
			dst_release(dst);
			goto drop;
		}

		preempt_disable();
		dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
		preempt_enable();
	}

	skb_dst_drop(skb);
	skb_dst_set(skb, dst);

369
370
371
372
	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
	if (unlikely(err))
		goto drop;

373
374
375
376
377
378
	return dst_output(net, sk, skb);
drop:
	kfree_skb(skb);
	return err;
}

379
static int seg6_build_state(struct net *net, struct nlattr *nla,
380
			    unsigned int family, const void *cfg,
381
382
			    struct lwtunnel_state **ts,
			    struct netlink_ext_ack *extack)
383
384
385
386
387
388
389
390
{
	struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
	struct seg6_iptunnel_encap *tuninfo;
	struct lwtunnel_state *newts;
	int tuninfo_len, min_size;
	struct seg6_lwt *slwt;
	int err;

391
392
393
	if (family != AF_INET && family != AF_INET6)
		return -EINVAL;

394
395
	err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
					  seg6_iptunnel_policy, extack);
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415

	if (err < 0)
		return err;

	if (!tb[SEG6_IPTUNNEL_SRH])
		return -EINVAL;

	tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
	tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);

	/* tuninfo must contain at least the iptunnel encap structure,
	 * the SRH and one segment
	 */
	min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
		   sizeof(struct in6_addr);
	if (tuninfo_len < min_size)
		return -EINVAL;

	switch (tuninfo->mode) {
	case SEG6_IPTUN_MODE_INLINE:
416
417
418
		if (family != AF_INET6)
			return -EINVAL;

419
420
421
		break;
	case SEG6_IPTUN_MODE_ENCAP:
		break;
422
423
	case SEG6_IPTUN_MODE_L2ENCAP:
		break;
424
425
426
427
428
	default:
		return -EINVAL;
	}

	/* verify that SRH is consistent */
429
	if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
430
431
432
433
434
435
436
437
		return -EINVAL;

	newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
	if (!newts)
		return -ENOMEM;

	slwt = seg6_lwt_lwtunnel(newts);

438
	err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
439
440
441
442
443
444
445
446
	if (err) {
		kfree(newts);
		return err;
	}

	memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);

	newts->type = LWTUNNEL_ENCAP_SEG6;
447
448
449
450
451
	newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;

	if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
		newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;

452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
	newts->headroom = seg6_lwt_headroom(tuninfo);

	*ts = newts;

	return 0;
}

static void seg6_destroy_state(struct lwtunnel_state *lwt)
{
	dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
}

static int seg6_fill_encap_info(struct sk_buff *skb,
				struct lwtunnel_state *lwtstate)
{
	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);

	if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
		return -EMSGSIZE;

	return 0;
}

static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
{
	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);

	return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
}

static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
	struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
	struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
	int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);

	if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
		return 1;

	return memcmp(a_hdr, b_hdr, len);
}

static const struct lwtunnel_encap_ops seg6_iptun_ops = {
	.build_state = seg6_build_state,
	.destroy_state = seg6_destroy_state,
	.output = seg6_output,
	.input = seg6_input,
	.fill_encap = seg6_fill_encap_info,
	.get_encap_size = seg6_encap_nlsize,
	.cmp_encap = seg6_encap_cmp,
	.owner = THIS_MODULE,
};

int __init seg6_iptunnel_init(void)
{
	return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
}

void seg6_iptunnel_exit(void)
{
	lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
}