国产精品国产三级国产专区53,YIN荡纯肉体育生NP男男,三人行也可以1v2海棠

本文將解析Spark中Driver服務(wù)的開啟流程，閑言少敘，直接進(jìn)入源碼。

首先Driver服務(wù)的開啟是在創(chuàng)建Driver的運(yùn)行時(shí)環(huán)境的時(shí)候完成的，如下所示：

SparkContext中：

// Create the Spark execution environment (cache, map output tracker, etc)
_env = createSparkEnv(_conf, isLocal, listenerBus)
SparkEnv.set(_env)

可以看到執(zhí)行的是SparkEnv的createDriverEnv：

private[spark] def createSparkEnv(
    conf: SparkConf,
    isLocal: Boolean,
    listenerBus: LiveListenerBus): SparkEnv = {
  // 創(chuàng)建Driver的運(yùn)行時(shí)環(huán)境，注意這里的numDriverCores是local模式下用來執(zhí)行計(jì)算的cores的個(gè)數(shù)，如果不是本地模式的話就是0
  SparkEnv.createDriverEnv(conf, isLocal, listenerBus, SparkContext.numDriverCores(master))
}

numDriverCores的計(jì)算：

/**
 * The number of driver cores to use for execution in local mode, 0 otherwise.
 */
private[spark] def numDriverCores(master: String): Int = {
  def convertToInt(threads: String): Int = {
    if (threads == "*") Runtime.getRuntime.availableProcessors() else threads.toInt
  }
  master match {
    case "local" => 1
    case SparkMasterRegex.LOCAL_N_REGEX(threads) => convertToInt(threads)
    case SparkMasterRegex.LOCAL_N_FAILURES_REGEX(threads, _) => convertToInt(threads)
    case _ => 0 // driver is not used for execution
  }
}

在SparkEnv中創(chuàng)建Driver運(yùn)行時(shí)環(huán)境的代碼：

/**
 * Create a SparkEnv for the driver.
 */
private[spark] def createDriverEnv(
    conf: SparkConf,
    isLocal: Boolean,
    listenerBus: LiveListenerBus,
    numCores: Int,
    mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
  assert(conf.contains("spark.driver.host"), "spark.driver.host is not set on the driver!")
  assert(conf.contains("spark.driver.port"), "spark.driver.port is not set on the driver!")
  val hostname = conf.get("spark.driver.host")
  val port = conf.get("spark.driver.port").toInt
  create(
    conf,
    SparkContext.DRIVER_IDENTIFIER,  // "driver"
    hostname,
    port,
    isDriver = true,
    isLocal = isLocal,
    numUsableCores = numCores,
    listenerBus = listenerBus,
    mockOutputCommitCoordinator = mockOutputCommitCoordinator
  )
}

我們?cè)谇懊娴奈恼轮写笾碌臑g覽過，現(xiàn)在聚焦Driver服務(wù)啟動(dòng)相關(guān)的部分：

// 這里我們是Driver，所以actorSystemName是"sparkDriver"
// 注意Spark2.x中已經(jīng)移除了對(duì)Akka的依賴，所以在Spark2.x中這里是driverSystemName和executorSystemName
// Create the ActorSystem for Akka and get the port it binds to.
val actorSystemName = if (isDriver) driverActorSystemName else executorActorSystemName
// 創(chuàng)建Driver的運(yùn)行時(shí)環(huán)境，注意這里的clientMode等于false
val rpcEnv = RpcEnv.create(actorSystemName, hostname, port, conf, securityManager,
  clientMode = !isDriver)

接下來是RpcEnv的create方法：

def create(
    name: String,
    host: String,
    port: Int,
    conf: SparkConf,
    securityManager: SecurityManager,
    clientMode: Boolean = false): RpcEnv = {
  // Using Reflection to create the RpcEnv to avoid to depend on Akka directly
  // 封裝成RpcEnvConfig，這里的name是"sparkDriver"，host是"driver"，clientMode是"false"
  val config = RpcEnvConfig(conf, name, host, port, securityManager, clientMode)
  // 這里實(shí)際上是通過反射的到的是NettyRpcEnvFactory，所以調(diào)用的是NettyRpcEnvFactory的create()方法
  getRpcEnvFactory(conf).create(config)
}

底層實(shí)現(xiàn)是NettyRpcEnvFactory的create方法：

def create(config: RpcEnvConfig): RpcEnv = {
  val sparkConf = config.conf
  // Use JavaSerializerInstance in multiple threads is safe. However, if we plan to support
  // KryoSerializer in future, we have to use ThreadLocal to store SerializerInstance
  val javaSerializerInstance =
    new JavaSerializer(sparkConf).newInstance().asInstanceOf[JavaSerializerInstance]
  // 實(shí)例化了NettyRpcEnv，名字為config.host，即driver
  val nettyEnv =
    new NettyRpcEnv(sparkConf, javaSerializerInstance, config.host, config.securityManager)
  // 傳進(jìn)來的clientMode為false，所以這里的判斷為true
  if (!config.clientMode) {
    // 定義了一個(gè)函數(shù)startNettyRpcEnv
    val startNettyRpcEnv: Int => (NettyRpcEnv, Int) = { actualPort =>
      nettyEnv.startServer(actualPort)
      // 返回NettyRpcEnv及其端口號(hào)
      (nettyEnv, nettyEnv.address.port)
    }
    try {
      // 開啟“sparkDriver”服務(wù)，注意此處傳進(jìn)了上面定義的函數(shù)，這里的config.name是"sparkDriver"，最后返回了NettyRpcEnv
      Utils.startServiceOnPort(config.port, startNettyRpcEnv, sparkConf, config.name)._1
    } catch {
      case NonFatal(e) =>
        nettyEnv.shutdown()
        throw e
    }
  }
  // 返回NettyRpcEnv
  nettyEnv
}

Utils中的startServiceOnPort方法：

def startServiceOnPort[T](
    startPort: Int,
    startService: Int => (T, Int),
    conf: SparkConf,
    serviceName: String = ""): (T, Int) = {
  // 我們傳進(jìn)來的startPort為0，所以會(huì)生成一個(gè)隨機(jī)的端口號(hào)
  require(startPort == 0 || (1024 <= startPort && startPort < 65536),
    "startPort should be between 1024 and 65535 (inclusive), or 0 for a random free port.")
  // " 'sparkDriver'"
  val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'"
  // 通過"spark.port.maxRetries"設(shè)置，如果沒有設(shè)置，而設(shè)置中包括"spark.testing"，
  // 最大重試次數(shù)就是100次，否則最大重試次數(shù)就是10次
  val maxRetries = portMaxRetries(conf)
  for (offset <- 0 to maxRetries) {
    // 設(shè)置端口號(hào)
    // Do not increment port if startPort is 0, which is treated as a special port
    val tryPort = if (startPort == 0) {
      startPort
    } else {
      // If the new port wraps around, do not try a privilege port
      ((startPort + offset - 1024) % (65536 - 1024)) + 1024
    }
    try {
      // 開啟服務(wù)，并返回服務(wù)和端口號(hào)，注意這里的startService是上面?zhèn)鬟M(jìn)來的那個(gè)函數(shù)startNettyRpcEnv
      // 所以我們實(shí)際上執(zhí)行的是startNettyRpcEnv(tryPort)，而根據(jù)startNettyRpcEnv函數(shù)的定義，實(shí)際
      // 上是調(diào)用了nettyEnv.startServer(tryPort)方法
      val (service, port) = startService(tryPort)
      // 創(chuàng)建成功后打印日志，serviceString就是"sparkDriver"
      logInfo(s"Successfully started service$serviceString on port $port.")
      // 返回服務(wù)和端口號(hào)
      return (service, port)
    } catch {
      case e: Exception if isBindCollision(e) =>
        if (offset >= maxRetries) {
          val exceptionMessage = s"${e.getMessage}: Service$serviceString failed after " +
            s"$maxRetries retries! Consider explicitly setting the appropriate port for the " +
            s"service$serviceString (for example spark.ui.port for SparkUI) to an available " +
            "port or increasing spark.port.maxRetries."
          val exception = new BindException(exceptionMessage)
          // restore original stack trace
          exception.setStackTrace(e.getStackTrace)
          throw exception
        }
        logWarning(s"Service$serviceString could not bind on port $tryPort. " +
          s"Attempting port ${tryPort + 1}.")
    }
  }
  // Should never happen
  throw new SparkException(s"Failed to start service$serviceString on port $startPort")
}

下面我們就具體看一下NettyRpcEnv中的這個(gè)startServer方法，具體的啟動(dòng)方法我們不再追蹤了，最后實(shí)際上創(chuàng)建了一個(gè)TransportServer。

def startServer(port: Int): Unit = {
  // 首先實(shí)例化bootstraps
  val bootstraps: java.util.List[TransportServerBootstrap] =
    if (securityManager.isAuthenticationEnabled()) {
      java.util.Arrays.asList(new SaslServerBootstrap(transportConf, securityManager))
    } else {
      java.util.Collections.emptyList()
    }
  // 實(shí)例化server
  server = transportContext.createServer(host, port, bootstraps)
  // 向dispatcher注冊(cè)
  dispatcher.registerRpcEndpoint(
    RpcEndpointVerifier.NAME, new RpcEndpointVerifier(this, dispatcher))
}

再回到SparkEnv中，開啟了"sparkDriver"服務(wù)后，又創(chuàng)建了Akka的ActorSystem，具體的創(chuàng)建過程就不分析了。

// 開啟了sparkDriverActorSystem服務(wù)，spark2.x中已經(jīng)移除了對(duì)Akka的依賴
val actorSystem: ActorSystem =
  if (rpcEnv.isInstanceOf[AkkaRpcEnv]) {
    rpcEnv.asInstanceOf[AkkaRpcEnv].actorSystem
  } else {
    val actorSystemPort =
      if (port == 0 || rpcEnv.address == null) {
        port
      } else {
        rpcEnv.address.port + 1
      }
    // Create a ActorSystem for legacy codes
    AkkaUtils.createActorSystem(
      actorSystemName + "ActorSystem",
      hostname,
      actorSystemPort,
      conf,
      securityManager
    )._1
  }
  
// 最后使用開啟的服務(wù)的端口替換掉原來的端口
if (isDriver) {
  conf.set("spark.driver.port", rpcEnv.address.port.toString)
} else if (rpcEnv.address != null) {
  conf.set("spark.executor.port", rpcEnv.address.port.toString)
}

我們使用spark-submit的client模式提交應(yīng)用程序時(shí)，就可以看到關(guān)于這部分的日志信息：

17/03/02 09:38:28 INFO Utils: Successfully started service 'sparkDriver' on port 33861.
17/03/02 09:38:29 INFO Slf4jLogger: Slf4jLogger started
17/03/02 09:38:29 INFO Remoting: Starting remoting
17/03/02 09:38:29 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@172.17.0.2:34803]
17/03/02 09:38:29 INFO Utils: Successfully started service 'sparkDriverActorSystem' on port 34803.

注意：本文基于的是Spark 1.6.3版本的源碼，并對(duì)Spark 2.x版本的改變進(jìn)行了相應(yīng)的說明，這里給出具體的連接供大家參考：

Spark 1.6.3 源碼

Spark 2.1.0 源碼

本文為原創(chuàng)，歡迎轉(zhuǎn)載，轉(zhuǎn)載請(qǐng)注明出處、作者，謝謝！

三个男躁一个女,国精产品一区一手机的秘密,麦子交换系列最经典十句话,欧美国产综合欧美视频

Spark-Core源碼精讀(7)、“Driver“服務(wù)啟動(dòng)流程解析

Spark-Core源碼精讀(7)、“Driver“服務(wù)啟動(dòng)流程解析

推薦閱讀更多精彩內(nèi)容

三个男躁一个女,国精产品一区一手机的秘密,麦子交换系列最经典十句话,欧美 国产 综合 欧美 视频

Spark-Core源碼精讀(7)、“Driver“服務(wù)啟動(dòng)流程解析

推薦閱讀更多精彩內(nèi)容

三个男躁一个女,国精产品一区一手机的秘密,麦子交换系列最经典十句话,欧美国产综合欧美视频

Spark-Core源碼精讀(7)、“Driver“服務(wù)啟動(dòng)流程解析