Dockerfile will kill the process if stuck so that restart policy can restart the container

Gunicorn will kill worker every 10000 requests which can help reclaim memory lost to leaks
This commit is contained in:
2026-02-12 03:52:53 +00:00
parent e6a03d33e5
commit 2ae3f3ba7e
3 changed files with 75 additions and 25 deletions

View File

@ -54,7 +54,8 @@ RUN chmod 777 /app/docker-entrypoint.sh \
ENTRYPOINT ["docker-entrypoint.sh"]
# at the end of your Dockerfile, before CMD or after EXPOSE
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD curl -f http://localhost/health || exit 1
# Kill the main process if the healthcheck fails. This will kill the container and restart policy can restart it.
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
CMD curl -f http://localhost/health || kill -s 15 1
CMD ["poetry", "run", "/app/run.sh"]

View File

@ -3,6 +3,9 @@ import multiprocessing
import os
# --- Worker sizing -----------------------------------------------------------
# Prefer explicit WEB_CONCURRENCY in containers. Otherwise fall back to
# WORKERS_PER_CORE * cpu_count(), with a minimum of 2 and optional MAX_WORKERS.
workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1")
max_workers_str = os.getenv("MAX_WORKERS")
use_max_workers = None
@ -10,12 +13,6 @@ if max_workers_str:
use_max_workers = int(max_workers_str)
web_concurrency_str = os.getenv("WEB_CONCURRENCY", None)
host = os.getenv("HOST", "0.0.0.0")
port = os.getenv("PORT", "9995")
bind_env = os.getenv("BIND", None)
use_loglevel = os.getenv("LOG_LEVEL", "info")
use_bind = bind_env if bind_env else f"{host}:{port}"
cores = multiprocessing.cpu_count()
workers_per_core = float(workers_per_core_str)
default_web_concurrency = workers_per_core * cores
@ -26,27 +23,47 @@ else:
web_concurrency = max(int(default_web_concurrency), 2)
if use_max_workers:
web_concurrency = min(web_concurrency, use_max_workers)
accesslog_var = os.getenv("ACCESS_LOG", "-")
use_accesslog = accesslog_var or None
errorlog_var = os.getenv("ERROR_LOG", "-")
use_errorlog = errorlog_var or None
graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120")
timeout_str = os.getenv("TIMEOUT", "120")
keepalive_str = os.getenv("KEEP_ALIVE", "5")
# Gunicorn config variables
loglevel = use_loglevel
# --- Bind / logging ----------------------------------------------------------
host = os.getenv("HOST", "0.0.0.0")
port = os.getenv("PORT", "9995")
bind_env = os.getenv("BIND", None)
use_bind = bind_env if bind_env else f"{host}:{port}"
loglevel = os.getenv("LOG_LEVEL", "info")
accesslog_var = os.getenv("ACCESS_LOG", "-")
accesslog = accesslog_var or None
errorlog_var = os.getenv("ERROR_LOG", "-")
errorlog = errorlog_var or None
worker_tmp_dir = "/dev/shm"
# --- Timeouts / keepalive ----------------------------------------------------
graceful_timeout = int(os.getenv("GRACEFUL_TIMEOUT", "120"))
timeout = int(os.getenv("TIMEOUT", "120"))
keepalive = int(os.getenv("KEEP_ALIVE", "5"))
# --- Robustness knobs (recommended) -----------------------------------------
# Recycle workers gradually to mitigate memory leaks / fragmentation without
# full container restarts. Tune via env if needed.
max_requests = int(os.getenv("MAX_REQUESTS", "10000"))
max_requests_jitter = int(os.getenv("MAX_REQUESTS_JITTER", "1000"))
# Prevent slow clients from holding connections forever (defaults are fine).
# You can tune these via env if you ever need to.
# worker_connections matters only for async worker types; kept here for clarity.
worker_connections = int(os.getenv("WORKER_CONNECTIONS", "1000"))
# Helpful in containerized environments: ensure workers are responsive.
# (Defaults are okay; leaving commented unless you want strict behavior.)
# heartbeat_interval = int(os.getenv("HEARTBEAT_INTERVAL", "30"))
# --- Gunicorn config vars ----------------------------------------------------
workers = web_concurrency
bind = use_bind
errorlog = use_errorlog
worker_tmp_dir = "/dev/shm"
accesslog = use_accesslog
graceful_timeout = int(graceful_timeout_str)
timeout = int(timeout_str)
keepalive = int(keepalive_str)
# For debugging and testing
# --- Debug print (keep if you like) ------------------------------------------
log_data = {
"loglevel": loglevel,
"workers": workers,
@ -56,10 +73,42 @@ log_data = {
"keepalive": keepalive,
"errorlog": errorlog,
"accesslog": accesslog,
"worker_tmp_dir": worker_tmp_dir,
"max_requests": max_requests,
"max_requests_jitter": max_requests_jitter,
"worker_connections": worker_connections,
# Additional, non-gunicorn variables
"workers_per_core": workers_per_core,
"use_max_workers": use_max_workers,
"host": host,
"port": port,
"cores": cores,
}
print(json.dumps(log_data))
# Variables Gunicorn reads
# (these must be module-level names)
# fmt: off
# Core
loglevel = loglevel
workers = workers
bind = bind
# Logging
accesslog = accesslog
errorlog = errorlog
# Runtime
worker_tmp_dir = worker_tmp_dir
graceful_timeout = graceful_timeout
timeout = timeout
keepalive = keepalive
# Recycling
max_requests = max_requests
max_requests_jitter = max_requests_jitter
# Concurrency (relevant for some worker types; harmless otherwise)
worker_connections = worker_connections
# fmt: on

View File

@ -1,3 +1,3 @@
#!/usr/bin/env bash
set -euo pipefail
gunicorn barker.main:app --worker-class uvicorn.workers.UvicornWorker --config ./gunicorn.conf.py --log-config ./logging.conf
exec gunicorn barker.main:app --worker-class uvicorn.workers.UvicornWorker --config ./gunicorn.conf.py --log-config ./logging.conf