builtin-trace.c 146 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/*
 * builtin-trace.c
 *
 * Builtin 'trace' command:
 *
 * Display a continuously updated trace of any workload, CPU, specific PID,
 * system wide, etc.  Default format is loosely strace like, but any other
 * event may be specified using --event.
 *
 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Initially based on the 'trace' prototype by Thomas Gleixner:
 *
 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
 */

17
#include "util/record.h"
18
19
#include <traceevent/event-parse.h>
#include <api/fs/tracing_path.h>
20
21
#include <bpf/bpf.h>
#include "util/bpf_map.h"
22
#include "util/rlimit.h"
23
#include "builtin.h"
24
#include "util/cgroup.h"
25
#include "util/color.h"
26
#include "util/config.h"
27
#include "util/debug.h"
28
#include "util/dso.h"
29
#include "util/env.h"
30
#include "util/event.h"
31
32
33
#include "util/evsel.h"
#include "util/evsel_fprintf.h"
#include "util/synthetic-events.h"
34
#include "util/evlist.h"
35
36
37
#include "util/evswitch.h"
#include "util/mmap.h"
#include <subcmd/pager.h>
38
39
#include <subcmd/exec-cmd.h>
#include "util/machine.h"
40
41
#include "util/map.h"
#include "util/symbol.h"
42
#include "util/path.h"
43
44
45
46
47
48
49
#include "util/session.h"
#include "util/thread.h"
#include <subcmd/parse-options.h>
#include "util/strlist.h"
#include "util/intlist.h"
#include "util/thread_map.h"
#include "util/stat.h"
50
51
#include "util/tool.h"
#include "util/util.h"
52
#include "trace/beauty/beauty.h"
53
54
55
56
#include "trace-event.h"
#include "util/parse-events.h"
#include "util/bpf-loader.h"
#include "callchain.h"
57
58
#include "print_binary.h"
#include "string2.h"
59
60
#include "syscalltbl.h"
#include "rb_resort.h"
61
#include "../perf.h"
62

63
64
65
66
#include <errno.h>
#include <inttypes.h>
#include <poll.h>
#include <signal.h>
67
#include <stdlib.h>
68
#include <string.h>
69
70
#include <linux/err.h>
#include <linux/filter.h>
71
#include <linux/kernel.h>
72
73
#include <linux/random.h>
#include <linux/stringify.h>
74
#include <linux/time64.h>
75
#include <linux/zalloc.h>
76
#include <fcntl.h>
77
#include <sys/sysmacros.h>
78

79
#include <linux/ctype.h>
80
#include <perf/mmap.h>
81

82
83
84
85
#ifndef O_CLOEXEC
# define O_CLOEXEC		02000000
#endif

86
87
88
89
#ifndef F_LINUX_SPECIFIC_BASE
# define F_LINUX_SPECIFIC_BASE	1024
#endif

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*
 * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
 */
struct syscall_arg_fmt {
	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
	bool	   (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
	unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
	void	   *parm;
	const char *name;
	u16	   nr_entries; // for arrays
	bool	   show_zero;
};

struct syscall_fmt {
	const char *name;
	const char *alias;
	struct {
		const char *sys_enter,
			   *sys_exit;
	}	   bpf_prog_name;
	struct syscall_arg_fmt arg[6];
	u8	   nr_args;
	bool	   errpid;
	bool	   timeout;
	bool	   hexret;
};

117
118
119
120
121
struct trace {
	struct perf_tool	tool;
	struct syscalltbl	*sctbl;
	struct {
		struct syscall  *table;
122
		struct bpf_map  *map;
123
124
125
126
		struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
			struct bpf_map  *sys_enter,
					*sys_exit;
		}		prog_array;
127
		struct {
128
			struct evsel *sys_enter,
129
130
					  *sys_exit,
					  *augmented;
131
		}		events;
132
		struct bpf_program *unaugmented_prog;
133
	} syscalls;
134
135
136
	struct {
		struct bpf_map *map;
	} dump;
137
	struct record_opts	opts;
138
	struct evlist	*evlist;
139
140
	struct machine		*host;
	struct thread		*current;
141
	struct bpf_object	*bpf_obj;
142
	struct cgroup		*cgroup;
143
144
145
	u64			base_time;
	FILE			*output;
	unsigned long		nr_events;
146
147
	unsigned long		nr_events_printed;
	unsigned long		max_events;
148
	struct evswitch		evswitch;
149
150
151
152
153
154
155
156
	struct strlist		*ev_qualifier;
	struct {
		size_t		nr;
		int		*entries;
	}			ev_qualifier_ids;
	struct {
		size_t		nr;
		pid_t		*entries;
157
		struct bpf_map  *map;
158
159
160
161
162
163
164
165
166
	}			filter_pids;
	double			duration_filter;
	double			runtime_ms;
	struct {
		u64		vfs_getname,
				proc_getname;
	} stats;
	unsigned int		max_stack;
	unsigned int		min_stack;
167
168
	int			raw_augmented_syscalls_args_size;
	bool			raw_augmented_syscalls;
169
	bool			fd_path_disabled;
170
	bool			sort_events;
171
172
173
174
175
176
177
	bool			not_ev_qualifier;
	bool			live;
	bool			full_time;
	bool			sched;
	bool			multiple_threads;
	bool			summary;
	bool			summary_only;
178
	bool			errno_summary;
179
	bool			failure_only;
180
	bool			show_comm;
181
	bool			print_sample;
182
183
	bool			show_tool_stats;
	bool			trace_syscalls;
184
	bool			libtraceevent_print;
185
	bool			kernel_syscallchains;
186
187
188
189
190
191
	s16			args_alignment;
	bool			show_tstamp;
	bool			show_duration;
	bool			show_zeros;
	bool			show_arg_names;
	bool			show_string_prefix;
192
193
194
	bool			force;
	bool			vfs_getname;
	int			trace_pgfaults;
195
	char			*perfconfig_events;
196
197
198
199
	struct {
		struct ordered_events	data;
		u64			last;
	} oe;
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
};

struct tp_field {
	int offset;
	union {
		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
	};
};

#define TP_UINT_FIELD(bits) \
static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
{ \
	u##bits value; \
	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
	return value;  \
}

TP_UINT_FIELD(8);
TP_UINT_FIELD(16);
TP_UINT_FIELD(32);
TP_UINT_FIELD(64);

#define TP_UINT_FIELD__SWAPPED(bits) \
static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
{ \
	u##bits value; \
	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
	return bswap_##bits(value);\
}

TP_UINT_FIELD__SWAPPED(16);
TP_UINT_FIELD__SWAPPED(32);
TP_UINT_FIELD__SWAPPED(64);

235
static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
236
{
237
	field->offset = offset;
238

239
	switch (size) {
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
	case 1:
		field->integer = tp_field__u8;
		break;
	case 2:
		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
		break;
	case 4:
		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
		break;
	case 8:
		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
		break;
	default:
		return -1;
	}

	return 0;
}

259
static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
260
261
262
263
{
	return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
}

264
265
266
267
268
static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
{
	return sample->raw_data + field->offset;
}

269
static int __tp_field__init_ptr(struct tp_field *field, int offset)
270
{
271
	field->offset = offset;
272
273
274
275
	field->pointer = tp_field__ptr;
	return 0;
}

276
static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
277
278
279
280
{
	return __tp_field__init_ptr(field, format_field->offset);
}

281
282
283
284
285
286
287
struct syscall_tp {
	struct tp_field id;
	union {
		struct tp_field args, ret;
	};
};

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
/*
 * The evsel->priv as used by 'perf trace'
 * sc:	for raw_syscalls:sys_{enter,exit} and syscalls:sys_{enter,exit}_SYSCALLNAME
 * fmt: for all the other tracepoints
 */
struct evsel_trace {
	struct syscall_tp	sc;
	struct syscall_arg_fmt  *fmt;
};

static struct evsel_trace *evsel_trace__new(void)
{
	return zalloc(sizeof(struct evsel_trace));
}

static void evsel_trace__delete(struct evsel_trace *et)
{
	if (et == NULL)
		return;

	zfree(&et->fmt);
	free(et);
}

/*
 * Used with raw_syscalls:sys_{enter,exit} and with the
 * syscalls:sys_{enter,exit}_SYSCALL tracepoints
 */
static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
{
	struct evsel_trace *et = evsel->priv;

	return &et->sc;
}

static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
{
	if (evsel->priv == NULL) {
		evsel->priv = evsel_trace__new();
		if (evsel->priv == NULL)
			return NULL;
	}

	return __evsel__syscall_tp(evsel);
}

/*
 * Used with all the other tracepoints.
 */
static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
{
	struct evsel_trace *et = evsel->priv;

	return et->fmt;
}

static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
{
	struct evsel_trace *et = evsel->priv;

	if (evsel->priv == NULL) {
		et = evsel->priv = evsel_trace__new();

		if (et == NULL)
			return NULL;
	}

	if (et->fmt == NULL) {
		et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
		if (et->fmt == NULL)
			goto out_delete;
	}

	return __evsel__syscall_arg_fmt(evsel);

out_delete:
	evsel_trace__delete(evsel->priv);
	evsel->priv = NULL;
	return NULL;
}

369
static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
370
{
371
	struct tep_format_field *format_field = evsel__field(evsel, name);
372
373
374
375
376
377
378
379

	if (format_field == NULL)
		return -1;

	return tp_field__init_uint(field, format_field, evsel->needs_swap);
}

#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
380
	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
381
	   evsel__init_tp_uint_field(evsel, &sc->name, #name); })
382

383
static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
384
{
385
	struct tep_format_field *format_field = evsel__field(evsel, name);
386
387
388
389
390
391
392
393

	if (format_field == NULL)
		return -1;

	return tp_field__init_ptr(field, format_field);
}

#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
394
	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
395
	   evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
396

397
static void evsel__delete_priv(struct evsel *evsel)
398
399
{
	zfree(&evsel->priv);
400
	evsel__delete(evsel);
401
402
}

403
static int evsel__init_syscall_tp(struct evsel *evsel)
404
{
405
	struct syscall_tp *sc = evsel__syscall_tp(evsel);
406

407
	if (sc != NULL) {
408
409
		if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
		    evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
410
			return -ENOENT;
411
412
413
414
415
416
		return 0;
	}

	return -ENOMEM;
}

417
static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
418
{
419
	struct syscall_tp *sc = evsel__syscall_tp(evsel);
420

421
	if (sc != NULL) {
422
		struct tep_format_field *syscall_id = evsel__field(tp, "id");
423
		if (syscall_id == NULL)
424
			syscall_id = evsel__field(tp, "__syscall_nr");
425
426
427
		if (syscall_id == NULL ||
		    __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
			return -EINVAL;
428
429
430
431
432
433
434

		return 0;
	}

	return -ENOMEM;
}

435
static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
436
{
437
	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
438
439
440
441

	return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
}

442
static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
443
{
444
	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
445
446
447
448

	return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
}

449
static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
450
{
451
	if (evsel__syscall_tp(evsel) != NULL) {
452
		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
453
			return -ENOENT;
454
455
456
457
458
459
460
461

		evsel->handler = handler;
		return 0;
	}

	return -ENOMEM;
}

462
static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
463
{
464
	struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
465
466
467

	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
	if (IS_ERR(evsel))
468
		evsel = evsel__newtp("syscalls", direction);
469
470
471
472

	if (IS_ERR(evsel))
		return NULL;

473
	if (evsel__init_raw_syscall_tp(evsel, handler))
474
475
476
477
478
		goto out_delete;

	return evsel;

out_delete:
479
	evsel__delete_priv(evsel);
480
481
482
483
	return NULL;
}

#define perf_evsel__sc_tp_uint(evsel, name, sample) \
484
	({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
485
486
487
	   fields->name.integer(&fields->name, sample); })

#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
488
	({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
489
490
	   fields->name.pointer(&fields->name, sample); })

491
492
493
494
495
496
497
498
499
500
501
502
503
504
size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
{
	int idx = val - sa->offset;

	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
		size_t printed = scnprintf(bf, size, intfmt, val);
		if (show_suffix)
			printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
		return printed;
	}

	return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
}

505
size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
506
507
{
	int idx = val - sa->offset;
508

509
510
511
512
513
514
	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
		size_t printed = scnprintf(bf, size, intfmt, val);
		if (show_prefix)
			printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
		return printed;
	}
515

516
	return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
517
518
519
520
521
522
}

static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
						const char *intfmt,
					        struct syscall_arg *arg)
{
523
	return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
524
525
526
527
528
529
530
531
532
533
}

static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
					      struct syscall_arg *arg)
{
	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
}

#define SCA_STRARRAY syscall_arg__scnprintf_strarray

534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
{
	return strarray__strtoul(arg->parm, bf, size, ret);
}

bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
{
	return strarray__strtoul_flags(arg->parm, bf, size, ret);
}

bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
{
	return strarrays__strtoul(arg->parm, bf, size, ret);
}

549
550
551
552
553
size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
{
	return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
}

554
size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
555
{
556
	size_t printed;
557
	int i;
558

559
560
	for (i = 0; i < sas->nr_entries; ++i) {
		struct strarray *sa = sas->entries[i];
561
		int idx = val - sa->offset;
562

563
564
565
		if (idx >= 0 && idx < sa->nr_entries) {
			if (sa->entries[idx] == NULL)
				break;
566
			return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
567
568
569
		}
	}

570
571
572
573
574
575
	printed = scnprintf(bf, size, intfmt, val);
	if (show_prefix)
		printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
	return printed;
}

576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
{
	int i;

	for (i = 0; i < sa->nr_entries; ++i) {
		if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
			*ret = sa->offset + i;
			return true;
		}
	}

	return false;
}

bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
{
	u64 val = 0;
	char *tok = bf, *sep, *end;

	*ret = 0;

	while (size != 0) {
		int toklen = size;

		sep = memchr(tok, '|', size);
		if (sep != NULL) {
			size -= sep - tok + 1;

			end = sep - 1;
			while (end > tok && isspace(*end))
				--end;

			toklen = end - tok + 1;
		}

		while (isspace(*tok))
			++tok;

		if (isalpha(*tok) || *tok == '_') {
			if (!strarray__strtoul(sa, tok, toklen, &val))
				return false;
		} else {
			bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');

			val = strtoul(tok, NULL, is_hexa ? 16 : 0);
		}

		*ret |= (1 << (val - 1));

		if (sep == NULL)
			break;
		tok = sep + 1;
	}

	return true;
}

bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
{
	int i;

	for (i = 0; i < sas->nr_entries; ++i) {
		struct strarray *sa = sas->entries[i];

		if (strarray__strtoul(sa, bf, size, ret))
			return true;
	}

	return false;
}

647
648
649
650
size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
					struct syscall_arg *arg)
{
	return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
651
}
652

653
654
655
656
#ifndef AT_FDCWD
#define AT_FDCWD	-100
#endif

657
658
659
660
static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
					   struct syscall_arg *arg)
{
	int fd = arg->val;
661
	const char *prefix = "AT_FD";
662
663

	if (fd == AT_FDCWD)
664
		return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
665
666
667
668
669
670
671
672
673
674
675

	return syscall_arg__scnprintf_fd(bf, size, arg);
}

#define SCA_FDAT syscall_arg__scnprintf_fd_at

static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
					      struct syscall_arg *arg);

#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd

676
size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
677
678
679
680
{
	return scnprintf(bf, size, "%#lx", arg->val);
}

681
682
683
684
685
686
687
size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
{
	if (arg->val == 0)
		return scnprintf(bf, size, "NULL");
	return syscall_arg__scnprintf_hex(bf, size, arg);
}

688
size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
689
690
691
692
{
	return scnprintf(bf, size, "%d", arg->val);
}

693
694
695
696
size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
{
	return scnprintf(bf, size, "%ld", arg->val);
}
697

698
699
700
701
702
703
704
705
706
707
static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
{
	// XXX Hey, maybe for sched:sched_switch prev/next comm fields we can
	//     fill missing comms using thread__set_comm()...
	//     here or in a special syscall_arg__scnprintf_pid_sched_tp...
	return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
}

#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array

708
709
710
711
static const char *bpf_cmd[] = {
	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
	"MAP_GET_NEXT_KEY", "PROG_LOAD",
};
712
static DEFINE_STRARRAY(bpf_cmd, "BPF_");
713

714
715
716
717
718
719
720
721
722
static const char *fsmount_flags[] = {
	[1] = "CLOEXEC",
};
static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");

#include "trace/beauty/generated/fsconfig_arrays.c"

static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");

723
static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
724
static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
725
726

static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
727
static DEFINE_STRARRAY(itimers, "ITIMER_");
728
729
730
731
732
733
734
735

static const char *keyctl_options[] = {
	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
};
736
static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
737
738
739
740
741
742
743
744
745

static const char *whences[] = { "SET", "CUR", "END",
#ifdef SEEK_DATA
"DATA",
#endif
#ifdef SEEK_HOLE
"HOLE",
#endif
};
746
static DEFINE_STRARRAY(whences, "SEEK_");
747
748
749

static const char *fcntl_cmds[] = {
	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
750
751
752
	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
	"SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
	"GETOWNER_UIDS",
753
};
754
static DEFINE_STRARRAY(fcntl_cmds, "F_");
755

756
757
758
759
760
761
static const char *fcntl_linux_specific_cmds[] = {
	"SETLEASE", "GETLEASE", "NOTIFY", [5] =	"CANCELLK", "DUPFD_CLOEXEC",
	"SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
	"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
};

762
static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
763
764
765
766
767
768
769
770

static struct strarray *fcntl_cmds_arrays[] = {
	&strarray__fcntl_cmds,
	&strarray__fcntl_linux_specific_cmds,
};

static DEFINE_STRARRAYS(fcntl_cmds_arrays);

771
772
773
774
775
static const char *rlimit_resources[] = {
	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
	"RTTIME",
};
776
static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
777
778

static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
779
static DEFINE_STRARRAY(sighow, "SIG_");
780
781
782
783
784
785

static const char *clockid[] = {
	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
};
786
static DEFINE_STRARRAY(clockid, "CLOCK_");
787
788
789
790

static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
						 struct syscall_arg *arg)
{
791
792
	bool show_prefix = arg->show_string_prefix;
	const char *suffix = "_OK";
793
794
795
796
	size_t printed = 0;
	int mode = arg->val;

	if (mode == F_OK) /* 0 */
797
		return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
798
799
#define	P_MODE(n) \
	if (mode & n##_OK) { \
800
		printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
		mode &= ~n##_OK; \
	}

	P_MODE(R);
	P_MODE(W);
	P_MODE(X);
#undef P_MODE

	if (mode)
		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);

	return printed;
}

#define SCA_ACCMODE syscall_arg__scnprintf_access_mode

static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
					      struct syscall_arg *arg);

#define SCA_FILENAME syscall_arg__scnprintf_filename

static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
						struct syscall_arg *arg)
{
825
826
	bool show_prefix = arg->show_string_prefix;
	const char *prefix = "O_";
827
828
829
830
	int printed = 0, flags = arg->val;

#define	P_FLAG(n) \
	if (flags & O_##n) { \
831
		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
		flags &= ~O_##n; \
	}

	P_FLAG(CLOEXEC);
	P_FLAG(NONBLOCK);
#undef P_FLAG

	if (flags)
		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);

	return printed;
}

#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags

#ifndef GRND_NONBLOCK
#define GRND_NONBLOCK	0x0001
#endif
#ifndef GRND_RANDOM
#define GRND_RANDOM	0x0002
#endif

static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
						   struct syscall_arg *arg)
{
857
858
	bool show_prefix = arg->show_string_prefix;
	const char *prefix = "GRND_";
859
860
861
862
	int printed = 0, flags = arg->val;

#define	P_FLAG(n) \
	if (flags & GRND_##n) { \
863
		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
		flags &= ~GRND_##n; \
	}

	P_FLAG(RANDOM);
	P_FLAG(NONBLOCK);
#undef P_FLAG

	if (flags)
		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);

	return printed;
}

#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags

879
880
#define STRARRAY(name, array) \
	  { .scnprintf	= SCA_STRARRAY, \
881
	    .strtoul	= STUL_STRARRAY, \
882
	    .parm	= &strarray__##array, }
883

884
885
#define STRARRAY_FLAGS(name, array) \
	  { .scnprintf	= SCA_STRARRAY_FLAGS, \
886
	    .strtoul	= STUL_STRARRAY_FLAGS, \
887
888
	    .parm	= &strarray__##array, }

889
#include "trace/beauty/arch_errno_names.c"
890
891
#include "trace/beauty/eventfd.c"
#include "trace/beauty/futex_op.c"
892
#include "trace/beauty/futex_val3.c"
893
894
895
896
897
898
899
900
901
902
903
904
#include "trace/beauty/mmap.c"
#include "trace/beauty/mode_t.c"
#include "trace/beauty/msg_flags.c"
#include "trace/beauty/open_flags.c"
#include "trace/beauty/perf_event_open.c"
#include "trace/beauty/pid.c"
#include "trace/beauty/sched_policy.c"
#include "trace/beauty/seccomp.c"
#include "trace/beauty/signum.c"
#include "trace/beauty/socket_type.c"
#include "trace/beauty/waitid_options.c"

905
static struct syscall_fmt syscall_fmts[] = {
906
907
	{ .name	    = "access",
	  .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
908
909
910
911
	{ .name	    = "arch_prctl",
	  .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
		   [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
	{ .name	    = "bind",
912
913
914
	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
		   [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
915
916
	{ .name	    = "bpf",
	  .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
917
	{ .name	    = "brk",	    .hexret = true,
918
	  .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
919
920
921
922
923
924
925
926
927
928
	{ .name     = "clock_gettime",
	  .arg = { [0] = STRARRAY(clk_id, clockid), }, },
	{ .name	    = "clone",	    .errpid = true, .nr_args = 5,
	  .arg = { [0] = { .name = "flags",	    .scnprintf = SCA_CLONE_FLAGS, },
		   [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
		   [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
		   [3] = { .name = "child_tidptr",  .scnprintf = SCA_HEX, },
		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
	{ .name	    = "close",
	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
929
	{ .name	    = "connect",
930
931
932
	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
		   [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
933
934
935
936
937
938
939
940
941
	{ .name	    = "epoll_ctl",
	  .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
	{ .name	    = "eventfd2",
	  .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
	{ .name	    = "fchmodat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
	{ .name	    = "fchownat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
	{ .name	    = "fcntl",
942
943
	  .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,  /* cmd */
			   .strtoul   = STUL_STRARRAYS,
944
945
946
947
948
			   .parm      = &strarrays__fcntl_cmds_arrays,
			   .show_zero = true, },
		   [2] = { .scnprintf =  SCA_FCNTL_ARG, /* arg */ }, }, },
	{ .name	    = "flock",
	  .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
949
950
951
952
953
954
955
956
957
	{ .name     = "fsconfig",
	  .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
	{ .name     = "fsmount",
	  .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
		   [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
	{ .name     = "fspick",
	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dfd */ },
		   [1] = { .scnprintf = SCA_FILENAME,	  /* path */ },
		   [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
958
959
960
	{ .name	    = "fstat", .alias = "newfstat", },
	{ .name	    = "fstatat", .alias = "newfstatat", },
	{ .name	    = "futex",
961
962
	  .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
		   [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
963
964
965
966
	{ .name	    = "futimesat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
	{ .name	    = "getitimer",
	  .arg = { [0] = STRARRAY(which, itimers), }, },
967
968
969
	{ .name	    = "getpid",	    .errpid = true, },
	{ .name	    = "getpgid",    .errpid = true, },
	{ .name	    = "getppid",    .errpid = true, },
970
971
972
973
	{ .name	    = "getrandom",
	  .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
	{ .name	    = "getrlimit",
	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
974
	{ .name	    = "gettid",	    .errpid = true, },
975
976
	{ .name	    = "ioctl",
	  .arg = {
977
978
979
980
#if defined(__i386__) || defined(__x86_64__)
/*
 * FIXME: Make this available to all arches.
 */
981
982
		   [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
983
#else
984
		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
985
#endif
986
987
988
989
990
991
	{ .name	    = "kcmp",	    .nr_args = 5,
	  .arg = { [0] = { .name = "pid1",	.scnprintf = SCA_PID, },
		   [1] = { .name = "pid2",	.scnprintf = SCA_PID, },
		   [2] = { .name = "type",	.scnprintf = SCA_KCMP_TYPE, },
		   [3] = { .name = "idx1",	.scnprintf = SCA_KCMP_IDX, },
		   [4] = { .name = "idx2",	.scnprintf = SCA_KCMP_IDX, }, }, },
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
	{ .name	    = "keyctl",
	  .arg = { [0] = STRARRAY(option, keyctl_options), }, },
	{ .name	    = "kill",
	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
	{ .name	    = "linkat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
	{ .name	    = "lseek",
	  .arg = { [2] = STRARRAY(whence, whences), }, },
	{ .name	    = "lstat", .alias = "newlstat", },
	{ .name     = "madvise",
	  .arg = { [0] = { .scnprintf = SCA_HEX,      /* start */ },
		   [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
	{ .name	    = "mkdirat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
	{ .name	    = "mknodat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1008
	{ .name	    = "mmap",	    .hexret = true,
1009
1010
1011
1012
/* The standard mmap maps to old_mmap on s390x */
#if defined(__s390x__)
	.alias = "old_mmap",
#endif
1013
	  .arg = { [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
1014
1015
1016
		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */
			   .strtoul   = STUL_STRARRAY_FLAGS,
			   .parm      = &strarray__mmap_flags, },
1017
1018
1019
1020
1021
		   [5] = { .scnprintf = SCA_HEX,	/* offset */ }, }, },
	{ .name	    = "mount",
	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
		   [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
1022
1023
1024
1025
1026
1027
	{ .name	    = "move_mount",
	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* from_dfd */ },
		   [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
		   [2] = { .scnprintf = SCA_FDAT,	/* to_dfd */ },
		   [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
		   [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
1028
1029
1030
1031
1032
	{ .name	    = "mprotect",
	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ }, }, },
	{ .name	    = "mq_unlink",
	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
1033
	{ .name	    = "mremap",	    .hexret = true,
1034
	  .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
	{ .name	    = "name_to_handle_at",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
	{ .name	    = "newfstatat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
	{ .name	    = "open",
	  .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
	{ .name	    = "open_by_handle_at",
	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
	{ .name	    = "openat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
	{ .name	    = "perf_event_open",
	  .arg = { [2] = { .scnprintf = SCA_INT,	/* cpu */ },
		   [3] = { .scnprintf = SCA_FD,		/* group_fd */ },
		   [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
	{ .name	    = "pipe2",
	  .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
	{ .name	    = "pkey_alloc",
	  .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS,	/* access_rights */ }, }, },
	{ .name	    = "pkey_free",
	  .arg = { [0] = { .scnprintf = SCA_INT,	/* key */ }, }, },
	{ .name	    = "pkey_mprotect",
	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
		   [3] = { .scnprintf = SCA_INT,	/* pkey */ }, }, },
	{ .name	    = "poll", .timeout = true, },
	{ .name	    = "ppoll", .timeout = true, },
1063
	{ .name	    = "prctl",
1064
1065
1066
	  .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */
			   .strtoul   = STUL_STRARRAY,
			   .parm      = &strarray__prctl_options, },
1067
1068
		   [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
		   [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
	{ .name	    = "pread", .alias = "pread64", },
	{ .name	    = "preadv", .alias = "pread", },
	{ .name	    = "prlimit64",
	  .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
	{ .name	    = "pwrite", .alias = "pwrite64", },
	{ .name	    = "readlinkat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
	{ .name	    = "recvfrom",
	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
	{ .name	    = "recvmmsg",
	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
	{ .name	    = "recvmsg",
	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
	{ .name	    = "renameat",
1083
1084
1085
1086
1087
1088
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
	{ .name	    = "renameat2",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
		   [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
	{ .name	    = "rt_sigaction",
	  .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
	{ .name	    = "rt_sigprocmask",
	  .arg = { [0] = STRARRAY(how, sighow), }, },
	{ .name	    = "rt_sigqueueinfo",
	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
	{ .name	    = "rt_tgsigqueueinfo",
	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
	{ .name	    = "sched_setscheduler",
	  .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
	{ .name	    = "seccomp",
	  .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP,	   /* op */ },
		   [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
	{ .name	    = "select", .timeout = true, },
1103
	{ .name	    = "sendfile", .alias = "sendfile64", },
1104
1105
1106
1107
1108
	{ .name	    = "sendmmsg",
	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
	{ .name	    = "sendmsg",
	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
	{ .name	    = "sendto",
1109
1110
	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
		   [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
1111
	{ .name	    = "set_tid_address", .errpid = true, },
1112
1113
1114
1115
1116
1117
	{ .name	    = "setitimer",
	  .arg = { [0] = STRARRAY(which, itimers), }, },
	{ .name	    = "setrlimit",
	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
	{ .name	    = "socket",
	  .arg = { [0] = STRARRAY(family, socket_families),
1118
1119
		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1120
1121
	{ .name	    = "socketpair",
	  .arg = { [0] = STRARRAY(family, socket_families),
1122
1123
		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
	{ .name	    = "stat", .alias = "newstat", },
	{ .name	    = "statx",
	  .arg = { [0] = { .scnprintf = SCA_FDAT,	 /* fdat */ },
		   [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
		   [3] = { .scnprintf = SCA_STATX_MASK,	 /* mask */ }, }, },
	{ .name	    = "swapoff",
	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
	{ .name	    = "swapon",
	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
	{ .name	    = "symlinkat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1135
1136
	{ .name	    = "sync_file_range",
	  .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
1137
1138
1139
1140
	{ .name	    = "tgkill",
	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
	{ .name	    = "tkill",
	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1141
1142
	{ .name     = "umount2", .alias = "umount",
	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
1143
1144
1145
1146
1147
	{ .name	    = "uname", .alias = "newuname", },
	{ .name	    = "unlinkat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
	{ .name	    = "utimensat",
	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
1148
	{ .name	    = "wait4",	    .errpid = true,
1149
	  .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
1150
	{ .name	    = "waitid",	    .errpid = true,
1151
	  .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
1152
1153
1154
1155
1156
1157
1158
1159
};

static int syscall_fmt__cmp(const void *name, const void *fmtp)
{
	const struct syscall_fmt *fmt = fmtp;
	return strcmp(name, fmt->name);
}

1160
1161
1162
1163
1164
static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
{
	return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
}

1165
1166
1167
static struct syscall_fmt *syscall_fmt__find(const char *name)
{
	const int nmemb = ARRAY_SIZE(syscall_fmts);
1168
	return __syscall_fmt__find(syscall_fmts, nmemb, name);
1169
1170
}

1171
static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1172
{
1173
	int i;
1174
1175

	for (i = 0; i < nmemb; ++i) {
1176
1177
		if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
			return &fmts[i];
1178
1179
1180
1181
1182
	}

	return NULL;
}

1183
1184
1185
1186
1187
1188
static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
{
	const int nmemb = ARRAY_SIZE(syscall_fmts);
	return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
}

1189
1190
1191
/*
 * is_exit: is this "exit" or "exit_group"?
 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
1192
 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
1193
 * nonexistent: Just a hole in the syscall table, syscall id not allocated
1194
 */
1195
struct syscall {
1196
	struct tep_event    *tp_format;
1197
	int		    nr_args;
1198
	int		    args_size;
1199
1200
1201
1202
	struct {
		struct bpf_program *sys_enter,
				   *sys_exit;
	}		    bpf_prog;
1203
1204
	bool		    is_exit;
	bool		    is_open;
1205
	bool		    nonexistent;
1206
	struct tep_format_field *args;
1207
1208
	const char	    *name;
	struct syscall_fmt  *fmt;
1209
	struct syscall_arg_fmt *arg_fmt;
1210
1211
};

1212
1213
1214
1215
1216
/*
 * Must match what is in the BPF program:
 *
 * tools/perf/examples/bpf/augmented_raw_syscalls.c
 */
1217
1218
struct bpf_map_syscall_entry {
	bool	enabled;
1219
	u16	string_args_len[6];
1220
1221
};

1222
1223
1224
1225
1226
1227
1228
1229
/*
 * We need to have this 'calculated' boolean because in some cases we really
 * don't know what is the duration of a syscall, for instance, when we start
 * a session and some threads are waiting for a syscall to finish, say 'poll',
 * in which case all we can do is to print "( ? ) for duration and for the
 * start timestamp.
 */
static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1230
1231
1232
1233
{
	double duration = (double)t / NSEC_PER_MSEC;
	size_t printed = fprintf(fp, "(");

1234
	if (!calculated)
1235
		printed += fprintf(fp, "         ");
1236
	else if (duration >= 1.0)
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
	else if (duration >= 0.01)
		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
	else
		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
	return printed + fprintf(fp, "): ");
}

/**
 * filename.ptr: The filename char pointer that will be vfs_getname'd
 * filename.entry_str_pos: Where to insert the string translated from
 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1249
1250
 * ret_scnprintf: syscall args may set this to a different syscall return
 *                formatter, for instance, fcntl may return fds, file flags, etc.
1251
1252
1253
1254
1255
1256
1257
1258
 */
struct thread_trace {
	u64		  entry_time;
	bool		  entry_pending;
	unsigned long	  nr_events;
	unsigned long	  pfmaj, pfmin;
	char		  *entry_str;
	double		  runtime_ms;
1259
	size_t		  (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1260
1261
1262
1263
1264
1265
1266
1267
        struct {
		unsigned long ptr;
		short int     entry_str_pos;
		bool	      pending_open;
		unsigned int  namelen;
		char	      *name;
	} filename;
	struct {
1268
1269
1270
		int	      max;
		struct file   *table;
	} files;
1271
1272
1273
1274
1275
1276
1277
1278

	struct intlist *syscall_stats;
};

static struct thread_trace *thread_trace__new(void)
{
	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));

1279
	if (ttrace) {
1280
		ttrace->files.max = -1;
1281
1282
		ttrace->syscall_stats = intlist__new(NULL);
	}
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309

	return ttrace;
}

static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
{
	struct thread_trace *ttrace;

	if (thread == NULL)
		goto fail;

	if (thread__priv(thread) == NULL)
		thread__set_priv(thread, thread_trace__new());

	if (thread__priv(thread) == NULL)
		goto fail;

	ttrace = thread__priv(thread);
	++ttrace->nr_events;

	return ttrace;
fail:
	color_fprintf(fp, PERF_COLOR_RED,
		      "WARNING: not enough memory, dropping samples!\n");
	return NULL;
}

1310
1311
1312
1313
1314
1315
1316
1317
1318

void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
				    size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
{
	struct thread_trace *ttrace = thread__priv(arg->thread);

	ttrace->ret_scnprintf = ret_scnprintf;
}

1319
1320
1321
1322
1323
#define TRACE_PFMAJ		(1 << 0)
#define TRACE_PFMIN		(1 << 1)

static const size_t trace__entry_str_size = 2048;

1324
static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1325
{