From 2ae3f3ba7e82e3a5e20c1cddc18f6e06a3b5458c Mon Sep 17 00:00:00 2001
From: Amritanshu <git@tanshu.com>
Date: Thu, 12 Feb 2026 03:52:53 +0000
Subject: [PATCH] Dockerfile will kill the process if stuck so that restart
 policy can restart the container Gunicorn will kill worker every 10000
 requests which can help reclaim memory lost to leaks

---
 Dockerfile              |  5 ++-
 barker/gunicorn.conf.py | 93 +++++++++++++++++++++++++++++++----------
 barker/run.sh           |  2 +-
 3 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 172aeb53..136e1cd4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -54,7 +54,8 @@ RUN chmod 777 /app/docker-entrypoint.sh \
 ENTRYPOINT ["docker-entrypoint.sh"]
 
 # at the end of your Dockerfile, before CMD or after EXPOSE
-HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
-  CMD curl -f http://localhost/health || exit 1
+# Kill the main process if the healthcheck fails. This will kill the container and restart policy can restart it.
+HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
+  CMD curl -f http://localhost/health  || kill -s 15 1
 
 CMD ["poetry", "run", "/app/run.sh"]
diff --git a/barker/gunicorn.conf.py b/barker/gunicorn.conf.py
index e8f07c01..f6a853fc 100644
--- a/barker/gunicorn.conf.py
+++ b/barker/gunicorn.conf.py
@@ -3,6 +3,9 @@ import multiprocessing
 import os
 
 
+# --- Worker sizing -----------------------------------------------------------
+# Prefer explicit WEB_CONCURRENCY in containers. Otherwise fall back to
+# WORKERS_PER_CORE * cpu_count(), with a minimum of 2 and optional MAX_WORKERS.
 workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1")
 max_workers_str = os.getenv("MAX_WORKERS")
 use_max_workers = None
@@ -10,12 +13,6 @@ if max_workers_str:
     use_max_workers = int(max_workers_str)
 web_concurrency_str = os.getenv("WEB_CONCURRENCY", None)
 
-host = os.getenv("HOST", "0.0.0.0")
-port = os.getenv("PORT", "9995")
-bind_env = os.getenv("BIND", None)
-use_loglevel = os.getenv("LOG_LEVEL", "info")
-use_bind = bind_env if bind_env else f"{host}:{port}"
-
 cores = multiprocessing.cpu_count()
 workers_per_core = float(workers_per_core_str)
 default_web_concurrency = workers_per_core * cores
@@ -26,27 +23,47 @@ else:
     web_concurrency = max(int(default_web_concurrency), 2)
     if use_max_workers:
         web_concurrency = min(web_concurrency, use_max_workers)
-accesslog_var = os.getenv("ACCESS_LOG", "-")
-use_accesslog = accesslog_var or None
-errorlog_var = os.getenv("ERROR_LOG", "-")
-use_errorlog = errorlog_var or None
-graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120")
-timeout_str = os.getenv("TIMEOUT", "120")
-keepalive_str = os.getenv("KEEP_ALIVE", "5")
 
-# Gunicorn config variables
-loglevel = use_loglevel
+# --- Bind / logging ----------------------------------------------------------
+host = os.getenv("HOST", "0.0.0.0")
+port = os.getenv("PORT", "9995")
+bind_env = os.getenv("BIND", None)
+use_bind = bind_env if bind_env else f"{host}:{port}"
+
+loglevel = os.getenv("LOG_LEVEL", "info")
+accesslog_var = os.getenv("ACCESS_LOG", "-")
+accesslog = accesslog_var or None
+errorlog_var = os.getenv("ERROR_LOG", "-")
+errorlog = errorlog_var or None
+
+worker_tmp_dir = "/dev/shm"
+
+# --- Timeouts / keepalive ----------------------------------------------------
+graceful_timeout = int(os.getenv("GRACEFUL_TIMEOUT", "120"))
+timeout = int(os.getenv("TIMEOUT", "120"))
+keepalive = int(os.getenv("KEEP_ALIVE", "5"))
+
+# --- Robustness knobs (recommended) -----------------------------------------
+# Recycle workers gradually to mitigate memory leaks / fragmentation without
+# full container restarts. Tune via env if needed.
+max_requests = int(os.getenv("MAX_REQUESTS", "10000"))
+max_requests_jitter = int(os.getenv("MAX_REQUESTS_JITTER", "1000"))
+
+# Prevent slow clients from holding connections forever (defaults are fine).
+# You can tune these via env if you ever need to.
+# worker_connections matters only for async worker types; kept here for clarity.
+worker_connections = int(os.getenv("WORKER_CONNECTIONS", "1000"))
+
+# Helpful in containerized environments: ensure workers are responsive.
+# (Defaults are okay; leaving commented unless you want strict behavior.)
+# heartbeat_interval = int(os.getenv("HEARTBEAT_INTERVAL", "30"))
+
+# --- Gunicorn config vars ----------------------------------------------------
 workers = web_concurrency
 bind = use_bind
-errorlog = use_errorlog
-worker_tmp_dir = "/dev/shm"
-accesslog = use_accesslog
-graceful_timeout = int(graceful_timeout_str)
-timeout = int(timeout_str)
-keepalive = int(keepalive_str)
 
 
-# For debugging and testing
+# --- Debug print (keep if you like) ------------------------------------------
 log_data = {
     "loglevel": loglevel,
     "workers": workers,
@@ -56,10 +73,42 @@ log_data = {
     "keepalive": keepalive,
     "errorlog": errorlog,
     "accesslog": accesslog,
+    "worker_tmp_dir": worker_tmp_dir,
+    "max_requests": max_requests,
+    "max_requests_jitter": max_requests_jitter,
+    "worker_connections": worker_connections,
     # Additional, non-gunicorn variables
     "workers_per_core": workers_per_core,
     "use_max_workers": use_max_workers,
     "host": host,
     "port": port,
+    "cores": cores,
 }
 print(json.dumps(log_data))
+
+
+# Variables Gunicorn reads
+# (these must be module-level names)
+# fmt: off
+# Core
+loglevel = loglevel
+workers = workers
+bind = bind
+
+# Logging
+accesslog = accesslog
+errorlog = errorlog
+
+# Runtime
+worker_tmp_dir = worker_tmp_dir
+graceful_timeout = graceful_timeout
+timeout = timeout
+keepalive = keepalive
+
+# Recycling
+max_requests = max_requests
+max_requests_jitter = max_requests_jitter
+
+# Concurrency (relevant for some worker types; harmless otherwise)
+worker_connections = worker_connections
+# fmt: on
diff --git a/barker/run.sh b/barker/run.sh
index 0ef27c8a..96007096 100755
--- a/barker/run.sh
+++ b/barker/run.sh
@@ -1,3 +1,3 @@
 #!/usr/bin/env bash
 set -euo pipefail
-gunicorn barker.main:app --worker-class uvicorn.workers.UvicornWorker --config ./gunicorn.conf.py --log-config ./logging.conf
+exec gunicorn barker.main:app --worker-class uvicorn.workers.UvicornWorker --config ./gunicorn.conf.py --log-config ./logging.conf