""" This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program.  If not, see . Copyright © 2019 Cloud Linux Software Inc. This software is also available under ImunifyAV commercial license, see """ import argparse import asyncio import gc import logging import os import signal import sys import time from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager, suppress from functools import partial from pathlib import Path from subprocess import CalledProcessError, check_output from typing import Tuple import daemon from lockfile import AlreadyLocked import daemon.pidfile import psutil import defence360agent.internals.logger from defence360agent import files from defence360agent.api import health, inactivity from defence360agent.contracts.config import ( ConfigsValidator, Core, Merger, Model, SimpleRpc, ) from defence360agent.contracts.hook_events import HookEvent from defence360agent.contracts.license import LicenseCLN from defence360agent.contracts.plugins import MessageSink, MessageSource from defence360agent.internals.global_scope import g from defence360agent.internals.iaid import IndependentAgentIDAPI from defence360agent.internals.the_sink import TheSink from defence360agent.model import instance, simplification, tls_check from defence360agent.simple_rpc import ( NonRootRpcServer, NonRootRpcServerAV, RpcServer, RpcServerAV, is_running, ) from defence360agent.subsys import systemd_notifier from defence360agent.utils import ( Task, create_task_and_log_exceptions, is_root_user, is_systemd_boot, ) from defence360agent.utils.check_db import is_db_corrupted from defence360agent.utils.cli import EXITCODE_GENERAL_ERROR from defence360agent.utils.common import DAY, rate_limit from imav.malwarelib.config import ( MalwareHitStatus, MalwareScanResourceType, ) from imav.malwarelib.model import MalwareHit import sentry_sdk # Increase recursion depth to allow malware scanner into deeply nested # directories with absolute path length up to 4096 symbols _MAX_RECURSION_DEPTH = 2100 _DB_IS_CORRUPTED_FLAG = Path("%s.is_corrupted" % Model.PATH) _DB_IS_CORRUPTED_MSG = ( "Imunify360 database is corrupt. " "Application cannot run with corrupt database. " "Please, contact Imunify360 support team at " "https://cloudlinux.zendesk.com" ) logger = logging.getLogger(__name__) throttled_log_error = rate_limit(period=DAY)(logger.error) class TaskFactory: def __init__(self): self.pool = set() def __call__(self, loop, coro): task = Task(coro, loop=loop) self.pool.add(task) task.add_done_callback(self.pool.discard) return task @contextmanager def log_and_suppress_error(message): """Log *message* on any error & suppress it.""" try: yield except Exception as e: logger.error("caught error %r on %s", e, message) sentry_sdk.capture_exception(e) async def _shutdown_task(loop, the_sink, plugin_list): with log_and_suppress_error("marking the start of the shutdown process"): # (there is SHUTDOWN_TIMEOUT) health.sensor.shutting_down(time.time()) logger.info("shutdown task starting, pid=%s", os.getpid()) with log_and_suppress_error( "preventing new messages (if any) processing to start" ): _tasks = [] async with asyncio.timeout(10): if "sensor_server" in g: g.sensor_server.close() _tasks.append(g.sensor_server.wait_closed()) # note: first exception is propagated; tasks are no canceled _tasks.append(the_sink.shutdown()) await asyncio.gather(*_tasks) for plugin in sorted(plugin_list, key=lambda p: p.SHUTDOWN_PRIORITY): with log_and_suppress_error( "This happened while shutting down a plugin!!" ): logger.info( "Shutting down %s.%s...", plugin.__class__.__module__, plugin.__class__.__name__, ) # make shutting down running task be a responsibility # of a particular plugin but not of a universal shotgun await plugin.shutdown() with log_and_suppress_error("shutting down IAID API"): await IndependentAgentIDAPI.shutdown() # Wait for graceful web-server restart (if it was started before shutdown) if (restart_task := g.get("web_server_restart_task")) is not None: with log_and_suppress_error("waiting for web server restart"): await asyncio.wait_for(restart_task) with log_and_suppress_error("stopping loop"): loop.stop() logger.info("shutdown task finished, pid=%s", os.getpid()) def _daemonize(pidfilepath): logger.info("Run as daemon [pidfile = %s]", pidfilepath) dc = daemon.DaemonContext() dc.pidfile = daemon.pidfile.PIDLockFile(pidfilepath) dc.prevent_core = False dc.umask = Core.FILE_UMASK if is_systemd_boot(): dc.detach_process = False else: dc.detach_process = True dc.files_preserve = defence360agent.internals.logger.get_fds() try: dc.open() except AlreadyLocked: logger.error("PID file already locked by another process") sys.exit(EXITCODE_GENERAL_ERROR) gc.collect() # quirk: somehow this is needed for root logger messages to do not # propagate to specialized loggers, e.g. 'perf', 'nework' defence360agent.internals.logger.reconfigure() async def _initial_files_update(): """Perform update files on start.""" await files.update_all_no_fail_if_files_exist() def _tls_check_reset(loop): # init thread id for simplification.run_in_executor() worker thread loop.run_until_complete( simplification.run_in_executor(loop, tls_check.reset) ) # mark current thread as "main_thread" for more informative error messages # PSSST! simplification.run_in_executor() is main thread now! :-X # tls_check.reset("main_thread") def plugin_instances(objs, pclass): return [p for p in objs if isinstance(p, pclass)] def _start_plugins(loop, plugin_classes) -> Tuple[TheSink, list, list]: plugins = [plugin_class() for plugin_class in plugin_classes] # instantiate sinks sinks = plugin_instances(plugins, MessageSink) for s in sinks: logger.info("Creating sink %r", s) loop.run_until_complete(s.create_sink(loop)) # instantiate sources the_sink = TheSink(sinks, loop) sources = plugin_instances(plugins, MessageSource) for s in sources: logger.info("Creating source %r", s) loop.run_until_complete(s.create_source(loop, the_sink)) the_sink.start() return the_sink, sinks, sources def _start_rpc(loop, the_sink: TheSink): logger.info("Starting RpcServers...") if SimpleRpc.SOCKET_ACTIVATION: rpc_servers = (RpcServerAV, NonRootRpcServerAV) else: rpc_servers = (RpcServer, NonRootRpcServer) for rpc in rpc_servers: loop.run_until_complete(rpc.create(loop, the_sink)) def _get_pids_open(*files): try: out = check_output( ["lsof", "+wt"] + list(files), env={"PATH": "/usr/sbin:/usr/bin", **os.environ}, ) except CalledProcessError as e: out = bytes(e.output) except FileNotFoundError: logger.warning("There is no lsof in /usr/sbin:/usr/bin") return [] except IOError: return [] lines = out.strip().split(b"\n") pids = [int(line) for line in lines if line] return list(set(pids)) def _check_able_to_start(pidfile): if is_running(): # get parent process info ppid = os.getppid() if ppid != 0: parent = psutil.Process(ppid).name() pids_used_socket = _get_pids_open( SimpleRpc.SOCKET_PATH, SimpleRpc.NON_ROOT_SOCKET_PATH ) process_used_socket = [] for pid in pids_used_socket: try: _pr = psutil.Process(pid) except psutil.NoSuchProcess: continue _local_parent = _pr.parent() if _local_parent: _parent_name = _local_parent.name() else: _parent_name = "None" process_used_socket.append( ( pid, _pr.name(), "parent process = %s" % str(_parent_name), ) ) try: with open(pidfile) as file: written_pid = file.read() except (OSError, IOError): written_pid = None throttled_log_error( "Instance of %s is already running. " 'Parent process "%s" with pid "%s". ' "Sockets are in use by %s. " "%s file contents %s pid" % ( Core.SVC_NAME, parent, ppid, str(process_used_socket), pidfile, written_pid, ) ) sys.exit(EXITCODE_GENERAL_ERROR) if is_db_corrupted(db_path=Model.PATH): if not _DB_IS_CORRUPTED_FLAG.exists(): logger.error(_DB_IS_CORRUPTED_MSG) _DB_IS_CORRUPTED_FLAG.touch() else: logger.warning(_DB_IS_CORRUPTED_MSG) sys.exit(EXITCODE_GENERAL_ERROR) else: with suppress(FileNotFoundError): _DB_IS_CORRUPTED_FLAG.unlink() def start(plugin_classes: list, init_actions) -> None: """Common function for agent service startup. plugin_classes is a list of classes implementing message processing plugins. init_actions is a coroutine that will be called prior to starting RPC and message processing.""" if not is_root_user(): logger.info("Imunify agent could be started by the root user only!") sys.exit(EXITCODE_GENERAL_ERROR) args = parse_cli() defence360agent.internals.logger.setLogLevel(args.verbose) if args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE"): defence360agent.internals.logger.update_logging_config_from_file( args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE") ) sys.setrecursionlimit(_MAX_RECURSION_DEPTH) _check_able_to_start(args.pidfile) if args.daemon: _daemonize(args.pidfile) systemd_notifier.notify(systemd_notifier.AgentState.DAEMONIZED) health.sensor.starting(time.time()) if not LicenseCLN.is_registered(): health.sensor.unregistered() loop = asyncio.get_event_loop() _cpu = os.cpu_count() # https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor # default's in Python 3.8 loop.set_default_executor( ThreadPoolExecutor(max_workers=min(32, _cpu + 4 if _cpu else 5)) ) loop.set_task_factory(TaskFactory()) try: _tls_check_reset(loop) instance.db.init(Model.PATH) validate_configs_on_start(loop) Merger.update_merged_config() loop.run_until_complete(init_actions()) try: _stop_pending_cleanup() except simplification.PeeweeException as e: # we intentionally capture all exceptions here and log them # it may happened on package update or other reasons, we don't # want to start agent in such case logger.error("Failed to stop pending cleanup. Reason: %s", repr(e)) sys.exit(EXITCODE_GENERAL_ERROR) # If this is first agent run - we SHOULD download # all of the static files # If it isn't first agent run - essential files already downloaded # and will be updated asynchronously if not loop.run_until_complete(files.essential_files_exist()): logger.info( "Essential files are missing. Performing initial files update." ) loop.run_until_complete(_initial_files_update()) inactivity.track.set_timeout(SimpleRpc.INACTIVITY_TIMEOUT) the_sink, sinks, sources = _start_plugins(loop, plugin_classes) _start_rpc(loop, the_sink) logger.info("Message Bus started") agent_started = HookEvent.AgentStarted( version=Core.VERSION, resident=False ) create_task_and_log_exceptions( loop, the_sink.process_message, agent_started ) # note: plugins are started before the shutdown task has been setup # therefore plugin.shutdown() won't be called before create_source() _setup_signal_handlers( loop, partial(_shutdown_task, loop, the_sink, sinks + sources) ) loop.run_forever() logger.info("loop stopped") finally: # closing the loop after loop.stop() cuts off pending tasks stacktraces loop.close() def validate_configs_on_start(loop): try: ConfigsValidator.validate_config_layers() except Exception as e: from defence360agent.hooks.execute import execute_hooks agent_misconfig = HookEvent.AgentMisconfig(error=repr(e)) loop.run_until_complete(execute_hooks(agent_misconfig)) logger.warning(str(e)) sys.exit(EXITCODE_GENERAL_ERROR) def _setup_signal_handlers(loop, shutdowntask): called = False # whether the signal handler was called already def _sighandler(loop, sig): nonlocal called if not called: called = True logger.info("Caught %s", sig) # note: store ref, to keep the task alive, just in case called = create_task_and_log_exceptions(loop, shutdowntask) else: logger.info( "Caught %s. Shutdown task is already running, please wait.", sig, ) for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGUSR1, signal.SIGUSR2): loop.add_signal_handler(sig, _sighandler, loop, sig) def parse_cli(): parser = argparse.ArgumentParser(description="Run imunify agent") parser.add_argument( "-v", dest="verbose", action="count", default=0, help=( "Level of logging. Each value corresponds to:" "1 - console only log level," "2 - previous plus add network log," "3 - all previous plus add process message log," "4 - all previous plus add debug log" ), ) parser.add_argument("--daemon", action="store_true", help="run as daemon") parser.add_argument( "--pidfile", default="/var/run/imunify360.pid", help="use with --daemon", ) parser.add_argument("--log-config", help="logging config filename") return parser.parse_args(sys.argv[1:]) def _stop_pending_cleanup(): """ Get back to FOUND all malware hits which have stuck in CLEANUP_STARTED """ hits = MalwareHit.select().where( MalwareHit.status == MalwareHitStatus.CLEANUP_STARTED, MalwareHit.resource_type == MalwareScanResourceType.FILE.value, ) MalwareHit.set_status(hits, MalwareHitStatus.FOUND)