diff --git a/internal/controller/controller.go b/internal/controller/controller.go index cfaf4993..99321cc5 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -105,6 +105,7 @@ func (c *Controller) Start(ctx context.Context) error { BPFVerifierLogLevel: uint32(c.config.BpfVerifierLogLevel), ProbabilisticInterval: c.config.ProbabilisticInterval, ProbabilisticThreshold: c.config.ProbabilisticThreshold, + OffCPUThreshold: uint32(c.config.OffCPUThreshold), }) if err != nil { return fmt.Errorf("failed to load eBPF tracer: %w", err) diff --git a/support/ebpf/off_cpu.ebpf.c b/support/ebpf/off_cpu.ebpf.c index 732df70a..ec8e86da 100644 --- a/support/ebpf/off_cpu.ebpf.c +++ b/support/ebpf/off_cpu.ebpf.c @@ -15,7 +15,7 @@ bpf_map_def SEC("maps") sched_times = { .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, .key_size = sizeof(u64), // pid_tgid .value_size = sizeof(u64), // time in ns - .max_entries = 256, + .max_entries = 256, // value is adjusted at load time in loadAllMaps. }; // tracepoint__sched_switch serves as entry point for off cpu profiling. diff --git a/tracer/tracer.go b/tracer/tracer.go index d5a51030..8f4af288 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -406,7 +406,7 @@ func initializeMapsAndPrograms(kernelSymbols *libpf.SymbolMap, cfg *Config) ( // Load all maps into the kernel that are used later on in eBPF programs. So we can rewrite // in the next step the placesholders in the eBPF programs with the file descriptors of the // loaded maps in the kernel. - if err = loadAllMaps(coll, ebpfMaps, cfg.MapScaleFactor); err != nil { + if err = loadAllMaps(coll, cfg, ebpfMaps); err != nil { return nil, nil, fmt.Errorf("failed to load eBPF maps: %v", err) } @@ -523,8 +523,8 @@ func removeTemporaryMaps(ebpfMaps map[string]*cebpf.Map) error { } // loadAllMaps loads all eBPF maps that are used in our eBPF programs. -func loadAllMaps(coll *cebpf.CollectionSpec, ebpfMaps map[string]*cebpf.Map, - mapScaleFactor int) error { +func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config, + ebpfMaps map[string]*cebpf.Map) error { restoreRlimit, err := rlimit.MaximizeMemlock() if err != nil { return fmt.Errorf("failed to adjust rlimit: %v", err) @@ -545,13 +545,20 @@ func loadAllMaps(coll *cebpf.CollectionSpec, ebpfMaps map[string]*cebpf.Map, ) adaption["pid_page_to_mapping_info"] = - 1 << uint32(pidPageMappingInfoSize+mapScaleFactor) + 1 << uint32(pidPageMappingInfoSize+cfg.MapScaleFactor) adaption["stack_delta_page_to_info"] = - 1 << uint32(stackDeltaPageToInfoSize+mapScaleFactor) + 1 << uint32(stackDeltaPageToInfoSize+cfg.MapScaleFactor) + + // To not loose too many scheduling events but also not oversize + // sched_times, calculate a size based on some assumptions. + // On modern systems /proc/sys/kernel/pid_max defaults to 4194304. + // Try to fit this PID space scaled down with cfg.OffCPUThreshold into + // this map. + adaption["sched_times"] = (4194304 / support.OffCPUThresholdMax) * cfg.OffCPUThreshold for i := support.StackDeltaBucketSmallest; i <= support.StackDeltaBucketLargest; i++ { mapName := fmt.Sprintf("exe_id_to_%d_stack_deltas", i) - adaption[mapName] = 1 << uint32(exeIDToStackDeltasSize+mapScaleFactor) + adaption[mapName] = 1 << uint32(exeIDToStackDeltasSize+cfg.MapScaleFactor) } for mapName, mapSpec := range coll.Maps { @@ -559,6 +566,11 @@ func loadAllMaps(coll *cebpf.CollectionSpec, ebpfMaps map[string]*cebpf.Map, log.Debugf("Size of eBPF map %s: %v", mapName, newSize) mapSpec.MaxEntries = newSize } + if mapName == "sched_times" && + cfg.OffCPUThreshold >= support.OffCPUThresholdMax { + // Off CPU Profiling is not enabled. So do not load this map. + continue + } ebpfMap, err := cebpf.NewMap(mapSpec) if err != nil { return fmt.Errorf("failed to load %s: %v", mapName, err)