Dockerfile will kill the process if stuck so that restart policy can restart the container

Gunicorn will kill worker every 10000 requests which can help reclaim memory lost to leaks
2026-02-12 03:52:53 +00:00
parent e6a03d33e5
commit 2ae3f3ba7e
3 changed files with 75 additions and 25 deletions
--- a/5
+++ b/5
@ -54,7 +54,8 @@ RUN chmod 777 /app/docker-entrypoint.sh \
 ENTRYPOINT ["docker-entrypoint.sh"]

 # at the end of your Dockerfile, before CMD or after EXPOSE
-HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
-  CMD curl -f http://localhost/health || exit 1
+# Kill the main process if the healthcheck fails. This will kill the container and restart policy can restart it.
+HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
+  CMD curl -f http://localhost/health  || kill -s 15 1

 CMD ["poetry", "run", "/app/run.sh"]
--- a/barker/gunicorn.conf.py
+++ b/barker/gunicorn.conf.py
@ -3,6 +3,9 @@ import multiprocessing
 import os


+# --- Worker sizing -----------------------------------------------------------
+# Prefer explicit WEB_CONCURRENCY in containers. Otherwise fall back to
+# WORKERS_PER_CORE * cpu_count(), with a minimum of 2 and optional MAX_WORKERS.
 workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1")
 max_workers_str = os.getenv("MAX_WORKERS")
 use_max_workers = None
@ -10,12 +13,6 @@ if max_workers_str:
    use_max_workers = int(max_workers_str)
 web_concurrency_str = os.getenv("WEB_CONCURRENCY", None)

-host = os.getenv("HOST", "0.0.0.0")
-port = os.getenv("PORT", "9995")
-bind_env = os.getenv("BIND", None)
-use_loglevel = os.getenv("LOG_LEVEL", "info")
-use_bind = bind_env if bind_env else f"{host}:{port}"
-
 cores = multiprocessing.cpu_count()
 workers_per_core = float(workers_per_core_str)
 default_web_concurrency = workers_per_core * cores
@ -26,27 +23,47 @@ else:
    web_concurrency = max(int(default_web_concurrency), 2)
    if use_max_workers:
        web_concurrency = min(web_concurrency, use_max_workers)
-accesslog_var = os.getenv("ACCESS_LOG", "-")
-use_accesslog = accesslog_var or None
-errorlog_var = os.getenv("ERROR_LOG", "-")
-use_errorlog = errorlog_var or None
-graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120")
-timeout_str = os.getenv("TIMEOUT", "120")
-keepalive_str = os.getenv("KEEP_ALIVE", "5")

-# Gunicorn config variables
-loglevel = use_loglevel
+# --- Bind / logging ----------------------------------------------------------
+host = os.getenv("HOST", "0.0.0.0")
+port = os.getenv("PORT", "9995")
+bind_env = os.getenv("BIND", None)
+use_bind = bind_env if bind_env else f"{host}:{port}"
+
+loglevel = os.getenv("LOG_LEVEL", "info")
+accesslog_var = os.getenv("ACCESS_LOG", "-")
+accesslog = accesslog_var or None
+errorlog_var = os.getenv("ERROR_LOG", "-")
+errorlog = errorlog_var or None
+
+worker_tmp_dir = "/dev/shm"
+
+# --- Timeouts / keepalive ----------------------------------------------------
+graceful_timeout = int(os.getenv("GRACEFUL_TIMEOUT", "120"))
+timeout = int(os.getenv("TIMEOUT", "120"))
+keepalive = int(os.getenv("KEEP_ALIVE", "5"))
+
+# --- Robustness knobs (recommended) -----------------------------------------
+# Recycle workers gradually to mitigate memory leaks / fragmentation without
+# full container restarts. Tune via env if needed.
+max_requests = int(os.getenv("MAX_REQUESTS", "10000"))
+max_requests_jitter = int(os.getenv("MAX_REQUESTS_JITTER", "1000"))
+
+# Prevent slow clients from holding connections forever (defaults are fine).
+# You can tune these via env if you ever need to.
+# worker_connections matters only for async worker types; kept here for clarity.
+worker_connections = int(os.getenv("WORKER_CONNECTIONS", "1000"))
+
+# Helpful in containerized environments: ensure workers are responsive.
+# (Defaults are okay; leaving commented unless you want strict behavior.)
+# heartbeat_interval = int(os.getenv("HEARTBEAT_INTERVAL", "30"))
+
+# --- Gunicorn config vars ----------------------------------------------------
 workers = web_concurrency
 bind = use_bind
-errorlog = use_errorlog
-worker_tmp_dir = "/dev/shm"
-accesslog = use_accesslog
-graceful_timeout = int(graceful_timeout_str)
-timeout = int(timeout_str)
-keepalive = int(keepalive_str)


-# For debugging and testing
+# --- Debug print (keep if you like) ------------------------------------------
 log_data = {
    "loglevel": loglevel,
    "workers": workers,
@ -56,10 +73,42 @@ log_data = {
    "keepalive": keepalive,
    "errorlog": errorlog,
    "accesslog": accesslog,
+    "worker_tmp_dir": worker_tmp_dir,
+    "max_requests": max_requests,
+    "max_requests_jitter": max_requests_jitter,
+    "worker_connections": worker_connections,
    # Additional, non-gunicorn variables
    "workers_per_core": workers_per_core,
    "use_max_workers": use_max_workers,
    "host": host,
    "port": port,
+    "cores": cores,
 }
 print(json.dumps(log_data))
+
+
+# Variables Gunicorn reads
+# (these must be module-level names)
+# fmt: off
+# Core
+loglevel = loglevel
+workers = workers
+bind = bind
+
+# Logging
+accesslog = accesslog
+errorlog = errorlog
+
+# Runtime
+worker_tmp_dir = worker_tmp_dir
+graceful_timeout = graceful_timeout
+timeout = timeout
+keepalive = keepalive
+
+# Recycling
+max_requests = max_requests
+max_requests_jitter = max_requests_jitter
+
+# Concurrency (relevant for some worker types; harmless otherwise)
+worker_connections = worker_connections
+# fmt: on
--- a/barker/run.sh
+++ b/barker/run.sh
@ -1,3 +1,3 @@
 #!/usr/bin/env bash
 set -euo pipefail
-gunicorn barker.main:app --worker-class uvicorn.workers.UvicornWorker --config ./gunicorn.conf.py --log-config ./logging.conf
+exec gunicorn barker.main:app --worker-class uvicorn.workers.UvicornWorker --config ./gunicorn.conf.py --log-config ./logging.conf