-
Notifications
You must be signed in to change notification settings - Fork 3.9k
/
Copy pathnumasched.py
executable file
·146 lines (118 loc) · 3.23 KB
/
numasched.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python
# @lint-avoid-python-3-compatibility-imports
#
# numasched Trace task NUMA switch
# For Linux, uses BCC, eBPF.
#
# USAGE: numasched [-p PID] [-t TID] [-c COMM]
#
# This script tracks NUMA migrations of tasks, and in general, frequent
# NUMA migrations can cause poor performance.
#
# Copyright 2022 CESTC, Co.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 14-Dec-2022 Rong Tao Created this.
from __future__ import print_function
from bcc import BPF
import argparse
from time import strftime
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
from time import sleep
# arguments
examples = """examples:
./numasched # trace all processes
./numasched -p 185 # trace PID 185 only
"""
parser = argparse.ArgumentParser(
description="Trace task NUMA switch",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-p", "--pid",
help="trace this PID only")
parser.add_argument("-t", "--tid",
help="trace this TID only")
parser.add_argument("-c", "--comm",
help="trace this COMM only")
args = parser.parse_args()
bpf_text = """
#include <linux/sched.h>
#include <linux/topology.h>
struct data_t {
char comm[TASK_COMM_LEN];
u32 pid;
u32 tid;
u32 old_nid;
u32 new_nid;
};
BPF_PERF_OUTPUT(events);
struct val_t {
u32 nid;
};
BPF_HASH(numaid_info, u32, struct val_t);
TRACEPOINT_PROBE(sched, sched_switch)
{
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
u32 tid = (u32)pid_tgid;
u32 new_nid = bpf_get_numa_node_id();
struct val_t val = {}, *valp;
u32 old_nid;
if (FILTER_PID)
return 0;
if (FILTER_TID)
return 0;
val.nid = new_nid;
valp = numaid_info.lookup(&tid);
if (!valp)
goto update;
old_nid = valp->nid;
if (old_nid != new_nid) {
struct data_t data = {};
bpf_get_current_comm(&data.comm, sizeof(data.comm));
data.pid = pid;
data.tid = tid;
data.old_nid = old_nid;
data.new_nid = new_nid;
events.perf_submit(args, &data, sizeof(data));
}
update:
numaid_info.update(&tid, &val);
return 0;
}
"""
if args.pid:
bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % args.pid)
else:
# always skip PID=0
bpf_text = bpf_text.replace('FILTER_PID', 'pid == 0')
if args.tid:
bpf_text = bpf_text.replace('FILTER_TID', 'tid != %s' % args.tid)
else:
# always skip TID=0
bpf_text = bpf_text.replace('FILTER_TID', 'tid == 0')
# process event
def print_event(cpu, data, size):
event = b["events"].event(data)
# Filter events by comm
if args.comm:
if not args.comm == event.comm.decode('utf-8', 'replace'):
return
print("%-8s %-8d %-8d %-8d -> %-8d %-8s" %
(strftime("%H:%M:%S"),
event.pid,
event.tid,
event.old_nid,
event.new_nid,
event.comm))
b = BPF(text=bpf_text)
print("Tracing task NUMA switch...")
print("%-8s %-8s %-8s %-8s %-8s %-8s" %
("TIME", "PID", "TID", "SRC_NID", "DST_NID", "COMM"))
b["events"].open_perf_buffer(print_event)
while 1:
try:
b.perf_buffer_poll()
except KeyboardInterrupt:
exit()