Gunicorn.worker實現(xiàn)了不同類型的work進程,有單進程、多線程、多協(xié)程等形式。
gunicorn.worker
目錄結(jié)構(gòu):
workers/
├── __init__.py
├── _gaiohttp.py
├── base.py
├── base_async.py
├── gaiohttp.py
├── geventlet.py
├── ggevent.py
├── gthread.py
├── gtornado.py
├── sync.py
└── workertmp.py
主要看以下幾個源碼文件
-
base.py
:基類文件 -
gthread.py
:單進程多線程工作模式 -
sync.py
:單進程單線程模式 -
workertmp
:tmp文件,master監(jiān)控worker進程的機制
剩下的其他文件大同小異。
Worker
下面是將Worker
類實現(xiàn)的簡略。
class Worker(object):
SIGNALS = [getattr(signal, "SIG%s" % x)
for x in "ABRT HUP QUIT INT TERM USR1 USR2 WINCH CHLD".split()] # 支持的信號
PIPE = []
def __init__(self, age, ppid, sockets, app, timeout, cfg, log)
def __str__(self)
def notify(self)
def run(self)
def init_process(self)
def load_wsgi(self) # 獲得實現(xiàn)wsgi協(xié)議的app,如Flask
def init_signals(self)
def handle_usr1(self, sig, frame)
def handle_exit(self, sig, frame)
def handle_quit(self, sig, frame)
def handle_abort(self, sig, frame)
def handle_error(self, req, client, addr, exc)
def handle_winch(self, sig, fname)
def __init__(self, age, ppid, sockets, app, timeout, cfg, log):
"""\
This is called pre-fork so it shouldn't do anything to the
current process. If there's a need to make process wide
changes you'll want to do that in ``self.init_process()``.
"""
self.age = age
self.pid = "[booting]"
self.ppid = ppid
self.sockets = sockets
self.app = app
self.timeout = timeout #超時時間
self.cfg = cfg # 配置
# 狀態(tài)
self.booted = False #已啟動
self.aborted = False #已終止
self.reloader = None
self.nr = 0
jitter = randint(0, cfg.max_requests_jitter)
self.max_requests = cfg.max_requests + jitter or sys.maxsize
self.alive = True # 是否存活
self.log = log #日志對象
self.tmp = WorkerTmp(cfg) # worker tmp文件
__init__()
做的事情相對簡單,就是將一些相關(guān)的參數(shù),如cfg
、app
等作為Worker
對象的屬性,同時創(chuàng)建一個tmpfile
,父進程通過檢查該文件的時間戳,來確認子進程是否存活。
def notify(self):
"""\
Your worker subclass must arrange to have this method called
once every ``self.timeout`` seconds. If you fail in accomplishing
this task, the master process will murder your workers.
"""
self.tmp.notify()
notify()
調(diào)用WorkerTmp.notify()
更改所對應(yīng)tmp文件的時間戳。
def init_process(self):
"""\
If you override this method in a subclass, the last statement
in the function should be to call this method with
super(MyWorkerClass, self).init_process() so that the ``run()``
loop is initiated.
"""
# set environment' variables
if self.cfg.env:
for k, v in self.cfg.env.items():
os.environ[k] = v
#設(shè)置進程信息
util.set_owner_process(self.cfg.uid, self.cfg.gid,
initgroups=self.cfg.initgroups)
# Reseed the random number generator
util.seed()
# For waking ourselves up
self.PIPE = os.pipe()
for p in self.PIPE:
util.set_non_blocking(p)
util.close_on_exec(p)
# Prevent fd inheritance
# close_on_exec 設(shè)置對應(yīng)的文件在創(chuàng)建子進程的時候不會被繼承
for s in self.sockets:
util.close_on_exec(s)
util.close_on_exec(self.tmp.fileno())
self.wait_fds = self.sockets + [self.PIPE[0]]
self.log.close_on_exec()
# 設(shè)置信號處理函數(shù)
self.init_signals()
# start the reloader
if self.cfg.reload:
def changed(fname):
self.log.info("Worker reloading: %s modified", fname)
self.alive = False
self.cfg.worker_int(self)
time.sleep(0.1)
sys.exit(0)
reloader_cls = reloader_engines[self.cfg.reload_engine]
self.reloader = reloader_cls(extra_files=self.cfg.reload_extra_files,
callback=changed)
self.reloader.start()
self.load_wsgi()
self.cfg.post_worker_init(self)
# Enter main run loop
self.booted = True
self.run() #主循環(huán)
init_process()
是Work進程
的入口文件,啟動工作進程調(diào)用的是該方法,官方建議所有的實現(xiàn)子類的重載方法應(yīng)該調(diào)用父類的該方法,該方法主要做了以下幾件事:
- 設(shè)置進程的進程組信息;
- 創(chuàng)建單進程管道,Worker是通過管道來存儲導(dǎo)致中斷的信號,不直接處理,先收集起來,在主循環(huán)中處理;
- 獲取要監(jiān)聽的文件描述符,并將描述符設(shè)置為不可被子進程繼承;
- 設(shè)置中斷信號處理函數(shù);
- 設(shè)置代碼更新時,自動重啟的配置
- 獲取實現(xiàn)了wsgi協(xié)議的app對象
- 進入主循環(huán)方法
def run(self):
"""\
This is the mainloop of a worker process. You should override
this method in a subclass to provide the intended behaviour
for your particular evil schemes.
"""
raise NotImplementedError()
Workder
類沒有實現(xiàn)run()
,由子類去實現(xiàn)具體的邏輯。
再來看看WorkerTmp
類。
WorkerTmp
# -*- coding: utf-8 -
#
# This file is part of gunicorn released under the MIT license.
# See the NOTICE for more information.
import os
import platform
import tempfile
from gunicorn import util
PLATFORM = platform.system()
IS_CYGWIN = PLATFORM.startswith('CYGWIN')
class WorkerTmp(object):
def __init__(self, cfg):
old_umask = os.umask(cfg.umask)
fdir = cfg.worker_tmp_dir
if fdir and not os.path.isdir(fdir):
raise RuntimeError("%s doesn't exist. Can't create workertmp." % fdir)
fd, name = tempfile.mkstemp(prefix="wgunicorn-", dir=fdir)
# allows the process to write to the file
util.chown(name, cfg.uid, cfg.gid)
os.umask(old_umask)
# unlink the file so we don't leak tempory files
try:
if not IS_CYGWIN:
# 即使這里unlink了文件,已經(jīng)打開了文件描述符仍然可以訪問該文件內(nèi)容 close能夠?qū)嵤﹦h除文件內(nèi)容的操作,必定因為在close之前有一個unlink操作。
util.unlink(name)
self._tmp = os.fdopen(fd, 'w+b', 1)
except:
os.close(fd)
raise
self.spinner = 0
def notify(self):
self.spinner = (self.spinner + 1) % 2
os.fchmod(self._tmp.fileno(), self.spinner) # 更新時間戳
def last_update(self):
return os.fstat(self._tmp.fileno()).st_ctime
def fileno(self):
return self._tmp.fileno()
def close(self):
return self._tmp.close()
WorkTmp
類主要的作用是創(chuàng)建一個臨時文件,子進程通過更新該文件的時間戳,父進程定期檢查子進程臨時文件的時間戳確定子進程是否存活。
WorkTmp._init__()
在系統(tǒng)創(chuàng)建了臨時文件并獲取其文件描述符,然后unlink
該文件,防止子進程關(guān)閉后沒有刪除文件,,即使被unlink
了,已經(jīng)打開的文件描述符仍然訪問文件。
WorkTmp.notify()
通過更改文件的權(quán)限來更新文件修改時間。
WorkTmp.last_update()
用來獲取文件最后一次更新的時間。
最后看下工作進程的一個實現(xiàn)子類ThreadWorker
。
ThreadWorker
該類實現(xiàn)了父類的
Woker.run()
方法,并重載了部分其他方法。
def init_process(self):
"""初始化函數(shù)
"""
self.tpool = futures.ThreadPoolExecutor(max_workers=self.cfg.threads)
self.poller = selectors.DefaultSelector() # 利用系統(tǒng)提供的selector
self._lock = RLock() # 創(chuàng)建可重入鎖
super(ThreadWorker, self).init_process()
初始化函數(shù)做了以下事情:
- 通過
concurrent.futures.ThreadPoolExecutor
創(chuàng)建線程池,線程的數(shù)量由配置文件的thread
決定; - 通過
selectors.DefaultSelector()
獲取符合所在平臺的最優(yōu)的I/O復(fù)用,如Linux
使用epoll
,Mac
下面使用Kqueue
,這個模塊隱藏了底層的平臺細節(jié),對外提供統(tǒng)一的接口; - 創(chuàng)建一個可重入鎖;
- 調(diào)用父類的
init_process()
,在該方法里面調(diào)用了run()
方法。
def run(self):
"""運行的主函數(shù)
①通知父進程,我還活著
②監(jiān)聽事件
③處理監(jiān)聽事件
④判斷父進程是否已經(jīng)掛了,是的話退出循環(huán)
⑤murder 超過keep-alive最長的時間的請求
"""
# init listeners, add them to the event loop
for sock in self.sockets:
sock.setblocking(False) # 設(shè)置為非阻塞
# a race condition during graceful shutdown may make the listener
# name unavailable in the request handler so capture it once here
server = sock.getsockname()
acceptor = partial(self.accept, server) # self.acceptor的偏函數(shù)
self.poller.register(sock, selectors.EVENT_READ, acceptor) #register(fileobj, events, data=None) 用data來保存callback函數(shù)
while self.alive: # 主循環(huán)
# notify the arbiter we are alive
self.notify() # todo 通知機制?
# can we accept more connections?
if self.nr_conns < self.worker_connections: # 防止超過并發(fā)數(shù)
# wait for an event
# selector新寫法
events = self.poller.select(1.0) # 等待事件
for key, _ in events:
callback = key.data #callback從data獲取
callback(key.fileobj)
# check (but do not wait) for finished requests
result = futures.wait(self.futures, timeout=0,
return_when=futures.FIRST_COMPLETED) #等待隊列事件 futures.wait 接收的第一個參數(shù)是一個可迭代對象,無阻塞等待完成
else:
# wait for a request to finish
result = futures.wait(self.futures, timeout=1.0, # 阻塞等待
return_when=futures.FIRST_COMPLETED)
# clean up finished requests
for fut in result.done:
self.futures.remove(fut)
if not self.is_parent_alive(): # 通過判斷ppid是否已經(jīng)發(fā)生變化
break
# hanle keepalive timeouts
self.murder_keepalived()
self.tpool.shutdown(False)
self.poller.close()
for s in self.sockets:
s.close()
futures.wait(self.futures, timeout=self.cfg.graceful_timeout) # 優(yōu)雅關(guān)閉等待的最長時間
run()
方法中主要做了以下事情:
- 更新
tmpfile
時間戳 - 獲取就緒的請求連接;
- 如果并發(fā)數(shù)允許,分配一個線程處理請求;
- 判斷父進程是否已經(jīng)停止工作,有的話準備退出主循環(huán);
- 殺死已經(jīng)允許最大連接事件的
keep-alive
連接。
下面是一個請求剛進來的處理過程:
def _wrap_future(self, fs, conn):
"""將futuren放入隊列中,并設(shè)置處理完成后的回調(diào)函數(shù)
Arguments:
fs {[type]} -- [description]
conn {[type]} -- [description]
"""
fs.conn = conn
self.futures.append(fs)
fs.add_done_callback(self.finish_request)
def enqueue_req(self, conn):
"""將請求放入線程處理
Arguments:
conn {[type]} -- [description]
"""
conn.init()
# submit the connection to a worker
fs = self.tpool.submit(self.handle, conn)
self._wrap_future(fs, conn)
def accept(self, server, listener):
"""監(jiān)聽時間處理函數(shù)
Arguments:
server {[type]} -- [description]
listener {[type]} -- [description]
"""
try:
sock, client = listener.accept()
# initialize the connection object
conn = TConn(self.cfg, sock, client, server)
self.nr_conns += 1 # 增加當前正在處理的請求數(shù)
# enqueue the job
self.enqueue_req(conn)
except EnvironmentError as e:
if e.errno not in (errno.EAGAIN,
errno.ECONNABORTED, errno.EWOULDBLOCK):
raise
- 從
socket
中accept
返回一個與客戶端連接的socket; - 將
socket
作為self.handler()
方法的參數(shù)啟動線程; - 注冊線程運行完成后的回調(diào)函數(shù)。
self.handler()
主要的部分在于其調(diào)用的self.handle_request()
,因此直接看self.handle_request()
做了哪些事情:
def handle_request(self, req, conn):
"""主要的處理函數(shù)
"""
environ = {}
resp = None
try:
self.cfg.pre_request(self, req)
request_start = datetime.now()
resp, environ = wsgi.create(req, conn.sock, conn.client,
conn.server, self.cfg)
environ["wsgi.multithread"] = True
self.nr += 1
if self.alive and self.nr >= self.max_requests:
self.log.info("Autorestarting worker after current request.")
resp.force_close()
self.alive = False
if not self.cfg.keepalive:
resp.force_close()
elif len(self._keep) >= self.max_keepalived:
resp.force_close()
respiter = self.wsgi(environ, resp.start_response)
try:
if isinstance(respiter, environ['wsgi.file_wrapper']):
resp.write_file(respiter)
else:
for item in respiter:
resp.write(item)
resp.close()
request_time = datetime.now() - request_start
self.log.access(resp, req, environ, request_time)
finally:
if hasattr(respiter, "close"):
respiter.close()
if resp.should_close():
self.log.debug("Closing connection.")
return False
except EnvironmentError:
# pass to next try-except level
util.reraise(*sys.exc_info())
except Exception:
if resp and resp.headers_sent:
# If the requests have already been sent, we should close the
# connection to indicate the error.
self.log.exception("Error handling request")
try:
conn.sock.shutdown(socket.SHUT_RDWR)
conn.sock.close()
except EnvironmentError:
pass
raise StopIteration()
raise
finally:
try:
self.cfg.post_request(self, req, environ, resp)
except Exception:
self.log.exception("Exception in post_request hook")
return True
除了一些配置和環(huán)境相關(guān)的處理,關(guān)鍵的在于respiter = self.wsgi(environ, resp.start_response)
這行代碼,這行代碼獲取了實現(xiàn)wsgi
協(xié)議的app
并運行,將獲取后的結(jié)果返回給客戶端。
這里就是整個請求處理的關(guān)鍵,只要符合wsgi協(xié)議的框架,都可以這樣接入Gunicorn。
整個ThreadWork
的處理流程,如下圖: