#!/bin/sh # Try to find out kernel modules with large total memory allocation during loading. # For large slab allocation, it will fall into buddy, also not trace "mm_page_free" # considering large free is quite rare for module_init, thus saving tons of events # to avoid trace data overwritten. # # Therefore, tracing "mm_page_alloc"alone should be enough for the purpose. # "sys/kernel/tracing" has the priority if exists. get_trace_base() { # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available. if [ -d "/sys/kernel/tracing" ]; then echo "/sys/kernel" else echo "/sys/kernel/debug" fi } # We want to enable these trace events. get_want_events() { echo "module:module_put module:module_load kmem:mm_page_alloc" } get_event_filter() { echo "comm == systemd-udevd || comm == modprobe || comm == insmod" } is_trace_ready() { local trace_base want_events current_events trace_base=$(get_trace_base) ! [ -f "$trace_base/tracing/trace" ] && return 1 [ "$(cat $trace_base/tracing/tracing_on)" -eq 0 ] && return 1 # Also check if trace events were properly setup. want_events=$(get_want_events) current_events=$(echo $(cat $trace_base/tracing/set_event)) [ "$current_events" != "$want_events" ] && return 1 return 0 } prepare_trace() { local trace_base trace_base=$(get_trace_base) # old debugfs interface case. if ! [ -d "$trace_base/tracing" ]; then mount none -t debugfs $trace_base # new tracefs interface case. elif ! [ -f "$trace_base/tracing/trace" ]; then mount none -t tracefs "$trace_base/tracing" fi if ! [ -f "$trace_base/tracing/trace" ]; then echo "WARN: Mount trace failed for kernel module memory analyzing." return 1 fi # Active all the wanted trace events. echo "$(get_want_events)" > $trace_base/tracing/set_event # There are three kinds of known applications for module loading: # "systemd-udevd", "modprobe" and "insmod". # Set them as the global events filter. # NOTE: Some kernel may not support this format of filter, anyway # the operation will fail and it doesn't matter. echo "$(get_event_filter)" > $trace_base/tracing/events/kmem/filter 2>&1 echo "$(get_event_filter)" > $trace_base/tracing/events/module/filter 2>&1 # Set the number of comm-pid if supported. if [ -f "$trace_base/tracing/saved_cmdlines_size" ]; then # Thanks to filters, 4096 is big enough(also well supported). echo 4096 > $trace_base/tracing/saved_cmdlines_size fi # Enable and clear trace data for the first time. echo 1 > $trace_base/tracing/tracing_on echo > $trace_base/tracing/trace echo "Prepare trace success." return 0 } order_to_pages() { local pages=1 local order=$1 while [ "$order" != 0 ]; do order=$((order-1)) pages=$(($pages*2)) done echo $pages } parse_trace_data() { local module_name tmp_eval pages cat "$(get_trace_base)/tracing/trace" | while read pid cpu flags ts function args do # Skip comment lines if [ "$pid" = "#" ]; then continue fi pid=${pid##*-} function=${function%:} if [ "$function" = "module_load" ]; then # One module is being loaded, save the task pid for tracking. # Remove the trailing after whitespace, there may be the module flags. module_name=${args%% *} # Mark current_module to track the task. eval current_module_$pid="$module_name" tmp_eval=$(eval echo '${module_loaded_'${module_name}'}') if [ -n "$tmp_eval" ]; then echo "WARN: \"$module_name\" was loaded multiple times!" fi eval unset module_loaded_$module_name eval nr_alloc_pages_$module_name=0 continue fi module_name=$(eval echo '${current_module_'${pid}'}') if [ -z "$module_name" ]; then continue fi # Once we get here, the task is being tracked(is loading a module). if [ "$function" = "module_put" ]; then # Mark the module as loaded when the first module_put event happens after module_load. tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}') echo "$tmp_eval pages consumed by \"$module_name\"" eval module_loaded_$module_name=1 # Module loading finished, so untrack the task. eval unset current_module_$pid eval unset nr_alloc_pages_$module_name continue fi if [ "$function" = "mm_page_alloc" ]; then # Get order first, then convert to actual pages. pages=$(echo $args | sed -e 's/.*order=\([0-9]*\) .*/\1/') pages=$(order_to_pages "$pages") tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}') eval nr_alloc_pages_$module_name="$(($tmp_eval+$pages))" fi done } cleanup_trace() { local trace_base if is_trace_ready; then trace_base=$(get_trace_base) echo 0 > $trace_base/tracing/tracing_on echo > $trace_base/tracing/trace echo > $trace_base/tracing/set_event echo 0 > $trace_base/tracing/events/kmem/filter echo 0 > $trace_base/tracing/events/module/filter fi } show_usage() { echo "Find out kernel modules with large memory consumption during loading based on trace." echo "Usage:" echo "1) run it first to setup trace." echo "2) run again to parse the trace data if any." echo "3) run with \"--cleanup\" option to cleanup trace after use." } if [ "$1" = "--help" ]; then show_usage exit 0 fi if [ "$1" = "--cleanup" ]; then cleanup_trace exit 0 fi if is_trace_ready ; then echo "tracekomem - Rough memory consumption by loading kernel modules (larger value with better accuracy)" parse_trace_data else prepare_trace fi exit $?