創建日志對象,默認是新建一個MemoryJournal對象
self.__raftLog = createJournal(self.__conf.journalFile)
#日志對象含有的屬性:
self.__journal = [] #[]存放的是 'command' 'idx' 'term'
self.__bytesSize = 0
創建序列化對象
self.__serializer = Serializer(self.__conf.fullDumpFile,
self.__conf.logCompactionBatchSiz
self.__conf.useFork,
self.__conf.serializer,
self.__conf.deserializer,
self.__conf.serializeChecker)
創建Poller
self._poller = createPoller(self.__conf.pollerType)
首先判斷有沒有select有沒有poll屬性:
- 有:返回PollPoller對象
- 沒有:返回SelectPoller()對象
通過poller來實現IO多路復用
創建TCP的服務端 | tcp_server.py 中的TcpServer的對象
self.__server = TcpServer(self._poller, host, port, onNewConnection=self.__o
sendBufferSize=self.__conf.sendBufferSize,
recvBufferSize=self.__conf.recvBufferSize,
connectionTimeout=self.__conf.connectionTimeout)
傳入onNewConnection=self.__onNewConnection作為新連接的處理函數
此時只是創建了一個服務端的對象,初始化了一些屬性,沒怎么處理,甚至還沒有創建套接字
獲得本SyncObj對象的需要復制的方法
methods = [m for m in dir(self) if callable(getattr(self, m)) and \
getattr(getattr(self, m), 'replicated', False) and \
m != getattr(getattr(self, m), 'origName')]
把需要復制的所有“方法”和“版本號“的存放到字典(funcVersions)
funcVersions[origFuncName].add(ver)
添加{"方法1":"版本號1","方法2":"版本號2",...}
此時獲取的是所有帶有”復制“功能的方法,而存儲的是是set
可能同一個方法名有多個版本
#存放了方法集合,元素是("版本號",0,方法,對象)
for method in methods:
ver = getattr(getattr(self, method), 'ver') #返回方法的'ver'
methodsToEnumerate.append((ver, 0, method, self))
從復制方法集中找到最高版本的方法的版本號——self.__selfCodeVersion
for ver, _, method, obj in sorted(methodsToEnumerate):
self.__selfCodeVersion = max(self.__selfCodeVersion, ver) #存放最新版本的方法?保證從各個復制方法總得到最大的版本號碼放入
if obj is self:
self._methodToID[method] = currMethodID #存放方法ID
else:
self._methodToID[(id(obj), method)] = currMethodID
self._idToMethod[currMethodID] = getattr(obj, method) #存放{'currMethodId':方法}
currMethodID += 1
存放兩個列表:
_methodToID:里面存著方法和對應的方法號 #存放{方法:currMethodID}
_idToMethod:里面存著方法號和對應的方法 #存放{currMethodId :方法}
__onSetCodeVersion(0)
#此處和之前一樣,取出需要復制的方法
methods = [m for m in dir(self) if callable(getattr(self, m)) and \
getattr(getattr(self, m), 'replicated', False) and \
m != getattr(getattr(self, m), 'origName')]
# 當前版本的方法名
self.__currentVersionFuncNames = {}
funcVersions = collections.defaultdict(set) #funcVersion字典,如果key值不存在時,返回一個set集合
for method in methods:
ver = getattr(getattr(self, method), 'ver')
origFuncName = getattr(getattr(self, method), 'origName')
funcVersions[origFuncName].add(ver) #添加{"方法名":{ver1,ver2...}} 可能存在多個版本的方法
#暫且不看,因為consumer默認為none
for consumer in self.__consumers:
consumerID = id(consumer)
consumerMethods = [m for m in dir(consumer) if callable(getattr(consumer, m)) and \
getattr(getattr(consumer, m), 'replicated', False)]
for method in consumerMethods:
ver = getattr(getattr(consumer, method), 'ver')
origFuncName = getattr(getattr(consumer, method), 'origName')
funcVersions[(consumerID, origFuncName)].add(ver)
for funcName, versions in iteritems(funcVersions):
versions = sorted(list(versions))
for v in versions:
if v > newVersion:
break
realFuncName = funcName[1] if isinstance(funcName, tuple) else funcName
#得到一個方法和版本的集合,都是版本比setcodeVersion版本低的復制方法
self.__currentVersionFuncNames[funcName] = realFuncName + '_v' + str(v)
相當于只有比setCodeVersion版本更低的方法是可以用的,指定了可用方法!
__bindedEvent = threading.Event()
Python threading模塊提供了Event對象用于線程間通信,它提供了設置、清除、等待等方法用于實現線程間的通信。event是最簡單的進程間通信方式之一,一個線程產生一個信號,另一個線程則等待該信號。Python 通過threading.Event()產生一個event對象,event對象維護一個內部標志(標志初始值為False),通過set()將其置為True,wait(timeout)則用于阻塞線程直至Flag被set(或者超時,可選的),isSet()用于查詢標志位是否為True,Clear()則用于清除標志位(使之為False)。
PipeNotifier(self._poller)
#appendEntriesUseBatch默認為TRUE
if not self.__conf.appendEntriesUseBatch and PIPE_NOTIF
self.__pipeNotifier = PipeNotifier(self._poller)
主要的函數處理
self.__mainThread = threading.current_thread()
self.__initialised = threading.Event()
#創建一個線程,傳入的參數是本實例對象的"弱引用"
self.__thread = threading.Thread(target=SyncObj._autoTickThread, args=(weakref.proxy(self),))
self.__thread.start() #啟動線程活動
self.__initialised.wait() #阻塞線程,直到Event.set(),由此可以看到線程SyncObj._autoTickThread會把線程不阻塞,
創建的線程執行的函數 _autoTickThread(self)
def _autoTickThread(self):
try:
self.__initInTickThread()
except SyncObjException as e:
if e.errorCode == 'BindError':
return
raise
finally:
self.__initialised.set() #set(), 可以繼續執行線程了
time.sleep(0.1)
try:
while True:
if not self.__mainThread.is_alive():
break
if self.__destroying:
self._doDestroy()
break
self._onTick(self.__conf.autoTickPeriod)
except ReferenceError:
pass
self.__initialised.set()目的:保證先執行__initInTickThread,再調用self._onTick
__initInTickThread
def __initInTickThread(self):
try:
self.__lastInitTryTime = time.time() #上次初始化的時間
if self.__selfNodeAddr is not None:
self.__server.bind() #開始對TCP服務端端口綁定
shouldConnect = None
else:
shouldConnect = True
self.__nodes = []
for nodeAddr in self.__otherNodesAddrs:
#剛開始shouldConnect是None
self.__nodes.append(Node(self, nodeAddr, shouldConnect))
self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1
self.__raftMatchIndex[nodeAddr] = 0 #剛開始的時候已經復制日志的最高索引值為0
self.__needLoadDumpFile = True
self.__isInitialized = True
self.__bindedEvent.set()
except:
self.__bindRetries += 1
if self.__conf.maxBindRetries and self.__bindRetries >= self.__conf.maxBindRetries:
self.__bindedEvent.set()
raise SyncObjException('BindError')
logging.exception('failed to perform initialization')
開始對TCP服務端端口綁定
bind | tcp_server.py
def bind(self):
self.__socket = socket.socket(self.__hostAddrType, socket.SOCK_STREAM)
self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, self.__sendBufferSize)
self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, self.__recvBufferSize)
self.__socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) #不適用Nagle算法
self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) #允許重用本地地址和端口
self.__socket.setblocking(0) #設置為非阻塞模式
self.__socket.bind((self.__host, self.__port))
self.__socket.listen(5)
self.__fileno = self.__socket.fileno() #返回監聽Socket的文件描述符
logging.warning(self.__fileno)
self.__poller.subscribe(self.__fileno,
self.__onNewConnection,
POLL_EVENT_TYPE.READ | POLL_EVENT_TYPE.ERROR)
self.__state = SERVER_STATE.BINDED
主要:
- 創建TCP Socket套接字
- 修改套接字的選項
- 設置setblocking為0非阻塞模式,此時accept()不會阻塞等待,可以實現多任務
- 向poll注冊事件,用于生成為客戶端服務的套接字
def __onNewConnection | tcp_server.py
用于接收客戶端的connect或者ERROR的時候解綁
conn = TcpConnection(poller=self.__poller,
socket=sock,
timeout=self.__connectionTimeout,
sendBufferSize=self.__sendBufferSize,
recvBufferSize=self.__recvBufferSize)
self.__onNewConnectionCallback(conn)
TcpConnection屬于tcp_connection.py
self.__onNewConnectionCallback的函數是SynvObj里的__onNewConnection函數,用于處理新的連接。
此時并沒有給出__onMessageReceived函數和__onConnected函數
__onNewConnection做了什么?
為客戶端的這個服務,將其放到self.__unknownConnections[descr] = conn
同時綁定方法:SyncObj的__onMessageReceived和__onDisconnected
創建TcpConection對象中
設置:self.__state = CONNECTION_STATE.CONNECTED
向poll注冊事件,一旦有接收消息或者發送消息事件的時候,調用__processConnection
__processConnection
if time.time() - self.__lastReadTime > self.__timeout:
self.disconnect()
return
判斷:如果接收到的消息發現事件已經比上次讀的時間超過了10秒,超時數據不處理,斷開連接。
#初次接收消息的時候,self__state == CONNECTION_STATE.CONNECTING
if self.__state == CONNECTION_STATE.CONNECTING:
#初始的時候,還是等于None的
if self.__onConnected is not None:
self.__onConnected()
self.__state = CONNECTION_STATE.CONNECTED
self.__lastReadTime = time.time()
return
第一次收到消息,表示連接已經建立好
接收數據:
if eventType & POLL_EVENT_TYPE.READ:
self.__tryReadBuffer()
if self.__state == CONNECTION_STATE.DISCONNECTED:
return
while True:
message = self.__processParseMessage()
if message is None:
break
if self.__onMessageReceived is not None:
#把消息傳到syncobj的消息接受函數中處理
self.__onMessageReceived(message)
if self.__state == CONNECTION_STATE.DISCONNECTED:
return
__tryReadBuffer():主要是等待__processRead返回TRUE時
def __processRead(self):
try:
incoming = self.__socket.recv(self.__recvBufferSize)
except socket.error as e:
if e.errno not in (socket.errno.EAGAIN, socket.errno.EWOULDBLOCK):
self.disconnect()
return False
if self.__socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR):
self.disconnect()
return False
if not incoming:
self.disconnect()
return False
self.__readBuffer += incoming
return True
message = self.__processParseMessage()開始解析數據:
def __processParseMessage(self):
if len(self.__readBuffer) < 4:
return None
#l是指定的數據長度?
l = struct.unpack('i', self.__readBuffer[:4])[0]
if len(self.__readBuffer) - 4 < l:
return None
data = self.__readBuffer[4:4 + l]
try:
if self.encryptor:
data = self.encryptor.decrypt(data)
message = pickle.loads(zlib.decompress(data))
if self.recvRandKey:
randKey, message = message
assert randKey == self.recvRandKey
except:
self.disconnect()
return None
self.__readBuffer = self.__readBuffer[4 + l:]
return message
提取數據部分并返回!
調用Synvobj傳來的接收消息處理函數__onMessageReceived處理message
__onMessageReceived
# message不是應該是字符串嗎?怎么是list?
if isinstance(message, list) and self.__onUtilityMessage(conn, message):
self.__unknownConnections.pop(descr, None)
return
# 消息只包含節點地址
partnerNode = None
for node in self.__nodes:
if node.getAddress() == message:
partnerNode = node
break
if partnerNode is None and message != 'readonly':
conn.disconnect()
self.__unknownConnections.pop(descr, None)
return
# 處理傳來集群其他節點IP地址的消息
if partnerNode is not None:
partnerNode.onPartnerConnected(conn)
else:
nodeAddr = str(self.__readonlyNodesCounter)
node = Node(self, nodeAddr, shouldConnect=False)
node.onPartnerConnected(conn)
self.__readonlyNodes.append(node)
self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1
self.__raftMatchIndex[nodeAddr] = 0
self.__readonlyNodesCounter += 1
self.__unknownConnections.pop(descr, None)
__onUtilityMessage
判斷消息是否有效,無效時返回FALSE
#消息類型
if message[0] == 'status':
conn.send(self.getStatus())
return True
elif message[0] == 'add':
# 默認沒開啟動態添加節點到集群的功能
self.addNodeToCluster(message[1],
callback=functools.partial(self.__utilityCallback, conn=conn, cmd='ADD',
node=message[1]))
return True
elif message[0] == 'remove':
if message[1] == self.__selfNodeAddr:
conn.send('FAIL REMOVE ' + message[1])
else:
# 默認沒開啟動態添加節點到集群的功能
self.removeNodeFromCluster(message[1], callback=functools.partial(self.__utilityCallback, conn=conn,
cmd='REMOVE', node=message[1]))
return True
elif message[0] == 'set_version':
# 切換到所有集群節點上的新代碼版本
self.setCodeVersion(message[1],
callback=functools.partial(self.__utilityCallback, conn=conn, cmd='SET_VERSION',
node=str(message[1])))
return True
也就是說,如果消息是請求本節點的狀態或者切換新代碼的版本,此時就返回True
切換到集群節點的新版本
def setCodeVersion(self, newVersion, callback=None):
"""切換到所有群集節點上的新代碼版本。
您應該確保群集節點已更新,否則將無法應用命令。
:param newVersion:新的代碼版本
:type int
:param callback:將在cussess上調用或失敗
:type callback:function(`FAIL_REASON <#pysyncobj.FAIL_REASON > `_,None)
"""
assert isinstance(newVersion, int)
if newVersion > self.__selfCodeVersion:
raise Exception(
'wrong version, current version is %d, requested version is %d' % (self.__selfCodeVersion, newVersion))
if newVersion < self.__enabledCodeVersion:
raise Exception('wrong version, enabled version is %d, requested version is %d' % (
self.__enabledCodeVersion, newVersion))
self._applyCommand(pickle.dumps(newVersion), callback, _COMMAND_TYPE.VERSION)
_applyCommand
def _applyCommand(self, command, callback, commandType=None):
try:
if commandType is None:
self.__commandsQueue.put_nowait((command, callback))
else:
# 放到隊列中
self.__commandsQueue.put_nowait((_bchr(commandType) + command, callback))
# 默認appendEntriesUseBatch是TRUE,Send multiple entries in a single command. 提高整體性能
# 所以第一個條件就不滿足了
if not self.__conf.appendEntriesUseBatch and PIPE_NOTIFIER_ENABLED:
self.__pipeNotifier.notify()
except Queue.Full:
self.__callErrCallback(FAIL_REASON.QUEUE_FULL, callback)
node 的 onPartnerConnected方法
添加方法:
conn.setOnMessageReceivedCallback(self.__onMessageReceived)
conn.setOnDisconnectedCallback(self.__onDisconnected)
self.__status = NODE_STATUS.CONNECTED
回到初始化__initInTickThread方法中
以上已經把TCP服務器建設好,以及新的連接過來怎么處理的函數都分析完了
接下來是節點對象的創建
for nodeAddr in self.__otherNodesAddrs:
self.__nodes.append(Node(self, nodeAddr, shouldConnect))
self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1
self.__raftMatchIndex[nodeAddr] = 0
Node初始化里的判斷shouldConnect判斷有點奇怪!!
IP地址大的向小的發送TCP請求
if self.__shouldConnect:
self.__ip = globalDnsResolver().resolve(nodeAddr.rsplit(':', 1)[0])
self.__port = int(nodeAddr.rsplit(':', 1)[1])
# 新建TCP連接的對象,主次此處傳入的是self的__onMessageReceived,onDisconnected
self.__conn = TcpConnection(poller=syncObj._poller,
onConnected=self.__onConnected,
onMessageReceived=self.__onMessageReceived,
onDisconnected=self.__onDisconnected,
timeout=syncObj._getConf().connectionTimeout,
sendBufferSize=syncObj._getConf().sendBufferSize,
recvBufferSize=syncObj._getConf().recvBufferSize)
self.__conn.encryptor = self.__encryptor
此時創建的TCPConnection的對象,沒有socket,
此時跳轉到onTick函數里
for node in self.__nodes:
node.connectIfRequired() #與其他節點均建立連接
再次看到node.py中的connectIfRequired:
def connectIfRequired(self):
if not self.__shouldConnect:
return
# 由于第一次創建node對象的時候,還未Connect,此時不進入這個if語句內
if self.__status != NODE_STATUS.DISCONNECTED:
return
if time.time() - self.__lastConnectAttemptTime < self.__syncObj()._getConf().connectionRetryTime:
return
# 嘗試開始建立連接
self.__status = NODE_STATUS.CONNECTING
self.__lastConnectAttemptTime = time.time()
#此時調用tcp_connection.py中的connect方法
if not self.__conn.connect(self.__ip, self.__port):
self.__status = NODE_STATUS.DISCONNECTED
return
connect方法中,連接到比本節點IP地址更小的IP的節點
try:
self.__socket.connect((host, port))
logging.debug('__socket.connect')
連接完后,更改self.__fileno,已經狀態改為CONNECTING,
注冊收發處理函數__processConnection
__processConnection和之前的那個是差不多的,不過此處的__onMessageReceived已經不一樣了,此處用的是Syncobj里的_onMessageReceived
_onMessageReceived | Syncobj.py
消息類型是request_vote
如果消息的任期大于本節點的任期:
- 更改任期為接收到的任期
- 設置__votedFor=None
- 設置本節點狀態為FOLLOWER
- 收到這個消息,說明是沒有Leader,設置self.__raftLeader = None
本節點如果是FOLLOWER或者是候選者:
獲取發送消息的候選人的最后日志的任期和最后日志的索引
遇到以下情況直接返回:
- 候選人比本節點的任期小
- 候選人與本節點的任期相同,但是索引比本節點小
- 任期如果和本節點相同,本節點已經為其他節點投票,
否則,為該節點投票,發送回復消息:response_vote
消息類型是append_entries,同時消息的類型是大于等于本節點的任期的
因為只有Leader節點會發這個消息,首先判斷發送這個消息的節點是不是Leader,如果不是,設置它為Leader節點
再對消息的任期判斷:
if message['term'] > self.__raftCurrentTerm:
# 更新最新的任期
self.__raftCurrentTerm = message['term']
# 收到附加消息,表示已經有Leader了,不需要投票了,清空
self.__votedFor = None
# 設置本節點為FOLLOWER,主要是在候選者身份競爭Leader失敗時,變回FOLLOWER
self.__setState(_RAFT_STATE.FOLLOWER)
提取message里的條目,
# 提取消息內容
newEntries = message.get('entries', [])
serialized = message.get('serialized', None)
# Leader已經提交的日志的索引值,更新本節點記錄的信息
self.__leaderCommitIndex = leaderCommitIndex = message['commit_index']
常規的附加消息的處理:
if transmission is not None:
if transmission == 'start':
self.__recvTransmission = message['data']
self.__sendNextNodeIdx(nodeAddr, success=False, reset=False)
return
elif transmission == 'process':
self.__recvTransmission += message['data']
self.__sendNextNodeIdx(nodeAddr, success=False, reset=False)
return
elif transmission == 'finish':
self.__recvTransmission += message['data']
#本此數據接收完了,字節轉換回對象
newEntries = [pickle.loads(self.__recvTransmission)]
self.__recvTransmission = ''
else:
raise Exception('Wrong transmission type')
prevLogIdx = message['prevLogIdx']
prevLogTerm = message['prevLogTerm']
# 拿出新的日志條目
prevEntries = self.__getEntries(prevLogIdx)
# 如果本節點沒有Leader發送的附加消息的日志條目,請求對方發送
if not prevEntries:
self.__sendNextNodeIdx(nodeAddr, success=False, reset=True)
return
# 日志的條目對應的任期不一致,請求再發?
if prevEntries[0][2] != prevLogTerm:
self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=prevLogIdx, success=False, reset=True)
return
# 有多個日志條目
if len(prevEntries) > 1:
# rollback cluster changes
# 先不看,默認沒開啟
if self.__conf.dynamicMembershipChange:
for entry in reversed(prevEntries[1:]):
clusterChangeRequest = self.__parseChangeClusterRequest(entry[0])
if clusterChangeRequest is not None:
self.__doChangeCluster(clusterChangeRequest, reverse=True)
# 有多個說明與當前Leader節點有沖突,這些日志條目要刪除
self.__deleteEntriesFrom(prevLogIdx + 1)
# 添加新的條目到節點日志里,newEntries是從Leader傳來的message中提取的
for entry in newEntries:
self.__raftLog.add(*entry)
# apply cluster changes
# 暫時不管
if self.__conf.dynamicMembershipChange:
for entry in newEntries:
clusterChangeRequest = self.__parseChangeClusterRequest(entry[0])
if clusterChangeRequest is not None:
self.__doChangeCluster(clusterChangeRequest)
nextNodeIdx = prevLogIdx + 1
if newEntries:
# 主要是針對多個條目的情況,取最新的索引,為什么不加1呢?
nextNodeIdx = newEntries[-1][1]
# 附加消息添加成功,返回下次節點接收的索引
self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=nextNodeIdx, success=True)
快照部分先不管
self.__raftCommitIndex = min(leaderCommitIndex, self.__getCurrentLogIndex())
通過最新的日志條目的索引和Leader的索引來比較,得到最近已經提交的條目索引
消息類型為:apply_command
if message['type'] == 'apply_command':
if 'request_id' in message:
self._applyCommand(message['command'], (nodeAddr, message['request_id']))
else:
self._applyCommand(message['command'], None)
調用了_applyCommand
def _applyCommand(self, command, callback, commandType=None):
try:
if commandType is None:
self.__commandsQueue.put_nowait((command, callback))
else:
self.__commandsQueue.put_nowait((_bchr(commandType) + command, callback))
if not self.__conf.appendEntriesUseBatch and PIPE_NOTIFIER_ENABLED:
self.__pipeNotifier.notify()
except Queue.Full:
self.__callErrCallback(FAIL_REASON.QUEUE_FULL, callback)
把命令放到隊列之中
消息類型:apply_command_response
if message['type'] == 'apply_command_response':
requestID = message['request_id']
error = message.get('error', None)
callback = self.__commandsWaitingReply.pop(requestID, None)
if callback is not None:
if error is not None:
callback(None, error)
else:
idx = message['log_idx']
term = message['log_term']
assert idx > self.__raftLastApplied
self.__commandsWaitingCommit[idx].append((term, callback))
消息類型:nextnode_idx
if self.__raftState == _RAFT_STATE.LEADER:
if message['type'] == 'next_node_idx':
reset = message['reset']
nextNodeIdx = message['next_node_idx']
success = message['success']
currentNodeIdx = nextNodeIdx - 1
if reset:
# 對于每一個服務器,需要發送給他的下一個日志條目的索引值
self.__raftNextIndex[nodeAddr] = nextNodeIdx
if success:
# 對于每一個服務器,已經復制給他的日志的最高索引值
self.__raftMatchIndex[nodeAddr] = currentNodeIdx
self.__lastResponseTime[nodeAddr] = time.time()
_onTick 最為復雜的函數
- 首先先檢查是否初始化過
- 查看是否需要加載文件,默認是不需要的
判斷是否需要重新選舉
#初始的時候狀態時FOLLOWER
if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE)
and self.__selfNodeAddr is not None:
#第一個條件表示:超時沒收到Leader的心跳包,超時——需要重新選舉;
#第二個條件判斷是否和其他節點有連接,第一次運行的時候是沒有的
if self.__raftElectionDeadline < time.time() and self.__connectedToAnyone():
#設置新的選舉死亡線,準備重新選舉 ,__generateRaftTimeout()
self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout()
#表明現在集群中沒有Leader
self.__raftLeader = None
#設置本節點的新的身份(候選者
#此時self.__raftState=_RAFT_STATE.CANDIDATE
self.__setState(_RAFT_STATE.CANDIDATE)
#進入到下一個term ,CurrentTerm 表示的是本節點的任期
self.__raftCurrentTerm += 1
#先為自己投票
self.__votedFor = self._getSelfNodeAddr()
self.__votesCount = 1
#依次給其他的節點發送投票請求 ,剛開始和其他節點還沒有連接的時候發布出去
for node in self.__nodes:
node.send({
'type': 'request_vote',
'term': self.__raftCurrentTerm,
'last_log_index': self.__getCurrentLogIndex(),
'last_log_term': self.__getCurrentLogTerm(),
})
self.__onLeaderChanged()
if self.__votesCount > (len(self.__nodes) + 1) / 2:
#票數為所有節點的一半以上,成為Leader
self.__onBecomeLeader()
符合服務器的設計遵守的原則:如果超過選舉超時時間的情況下都有接收到領導人的蕭條,或者候選人請求投票,自己變為候選人
self.__onLeaderChanged()
def __onLeaderChanged(self):
for id in sorted(self.__commandsWaitingReply):
self.__commandsWaitingReply[id](None, FAIL_REASON.LEADER_CHANGED)
self.__commandsWaitingReply = {}
如果本節點是Leader的處理
if self.__raftState == _RAFT_STATE.LEADER:
# 表明現在還有部分的日志條目未提交
while self.__raftCommitIndex < self.__getCurrentLogIndex():
nextCommitIndex = self.__raftCommitIndex + 1
count = 1
for node in self.__nodes:
if self.__raftMatchIndex[node.getAddress()] >= nextCommitIndex:
count += 1
# 表示超過半數的節點以及復制了該條目,更新raftCommitIndex的值
if count > (len(self.__nodes) + 1) / 2:
self.__raftCommitIndex = nextCommitIndex
else:
break
# 表示所有的日志條目已經被提交
self.__leaderCommitIndex = self.__raftCommitIndex
deadline = time.time() - self.__conf.leaderFallbackTimeout
count = 1
for node in self.__nodes:
if self.__lastResponseTime[node.getAddress()] > deadline:
count += 1
#判斷是否Leader與超過一般以上的節點均超時,如果是則變為Follower,重新選舉
if count <= (len(self.__nodes) + 1) / 2:
self.__setState(_RAFT_STATE.FOLLOWER)
self.__raftLeader = None
已經復制的條目中沒有應用到狀態機的條目的處理
if self.__raftCommitIndex > self.__raftLastApplied:
count = self.__raftCommitIndex - self.__raftLastApplied
#返已經復制的條目中沒有應用到狀態機的條目
entries = self.__getEntries(self.__raftLastApplied + 1, count)
#entry中含有
for entry in entries:
try:
# 取出條目的任期
currentTermID = entry[2]
# __commandsWaitingCommit取出對應索引的元素
subscribers = self.__commandsWaitingCommit.pop(entry[1], [])
# 應用命令,跳轉到下一個命令可以看__doApplyCommand分析
# 取出條目中對應的需要復制的方法
res = self.__doApplyCommand(entry[0])
for subscribeTermID, callback in subscribers:
if subscribeTermID == currentTermID:
# 調用對應的方法
callback(res, FAIL_REASON.SUCCESS)
else:
callback(None, FAIL_REASON.DISCARDED)
#每次應用到狀態機的日志都會+1
self.__raftLastApplied += 1
except SyncObjExceptionWrongVer as e:
logging.error(
'request to switch to unsupported code version (self version: %d, requested version: %d)' %
(self.__selfCodeVersion, e.ver))
#appendEntriesUseBatch默認為TRUE
if not self.__conf.appendEntriesUseBatch:
needSendAppendEntries = True
從__commandsWaitingCommit中取出事件,
將命令應用狀態機
__doApplyCommand()
def __doApplyCommand(self, command):
commandType = ord(command[:1])
# Skip no-op and membership change commands
if commandType == _COMMAND_TYPE.VERSION:
#把字節轉換為數據,版本的情況
ver = pickle.loads(command[1:])
#此時的命令版本過高,本節點沒有能處理該command的方法
if self.__selfCodeVersion < ver:
raise SyncObjExceptionWrongVer(ver)
oldVer = self.__enabledCodeVersion
# 獲得新的版本號
self.__enabledCodeVersion = ver
# onCodeVersionChanged默認為None
# This callback will be called
# when cluster is switched to new version.
callback = self.__conf.onCodeVersionChanged
# 設置新的可以方法版本的集合!
# 相當于給了一個上限,節點中高于這個版本的方法統統不可用
self.__onSetCodeVersion(ver)
# 默認為None,暫時不看
if callback is not None:
callback(oldVer, ver)
# 關于可用版本的設置命令“消息”處理完畢
return
if commandType != _COMMAND_TYPE.REGULAR:
return
# 處理REGULAR 的Command,字節轉換為對象
command = pickle.loads(command[1:])
args = []
kwargs = {
'_doApply': True,
}
# 非元組的Command-即不帶參數的Command?
if not isinstance(command, tuple):
funcID = command
# 帶參數的Command
elif len(command) == 2:
funcID, args = command
else:
# 不僅是元組,切帶了關鍵字參數
funcID, args, newKwArgs = command
kwargs.update(newKwArgs)
#返回含有復制裝飾器的方法,同時傳入參數
return self._idToMethod[funcID](*args, **kwargs)
主要是通過entry[0]取出對應的需要復制的方法,
定期發送心跳包
if self.__raftState == _RAFT_STATE.LEADER:
#needSendAppendEntries表示需要發送心跳包了,
if time.time() > self.__newAppendEntriesTime
or needSendAppendEntries:
self.__sendAppendEntries()
選舉成功或者周期到了,需要發送心跳包
onReady處理
# callback when hock SyncObj
# 初始化Syncobj時,__onReadyCalled為False
# self.__raftLastApplied == self.__leaderCommitIndex此時表示所有日志條目已經被復制到集群節點
if not self.__onReadyCalled and self.__raftLastApplied == self.__leaderCommitIndex:
#只要SyncObj同步來自leader的所有數據,就會調用此回調。
# onReady,默認為None
if self.__conf.onReady:
self.__conf.onReady()
# 此時變化為True
self.__onReadyCalled = True
暫時不管,因為沒有onReady()
self._checkCommandsToApply()
與其他節點建立好連接
for node in self.__nodes:
node.connectIfRequired()
此部分,上面內容已經分析過
每秒執行的任務
if time.time() > self.__lastReadonlyCheck + 1.0:
self.__lastReadonlyCheck = time.time()
newReadonlyNodes = []
for node in self.__readonlyNodes:
if node.isConnected():
newReadonlyNodes.append(node)
else:
self.__raftNextIndex.pop(node, None)
self.__raftMatchIndex.pop(node, None)
node._destroy()
先不管了,此部分是對只讀節點的處理