需求: 將數據表格中的單列拆分成多行
解決方法: 在dataframe使用explode,explode可將array類型的列拆分成行,udf可將自定義行數定制數據的處理邏輯,最后生成array類型。
代碼示例:
import org.apache.spark.sql.functions.{udf, array, explode, col}
case class Result ( date: String, usage: Double )
def splitUsage = udf { (datediff:Integer, startdate: String, usage:Integer) =>
? ? ? ? ? ?if (datediff == 32) {
? ? ? ? ? ? ? ? ? ? ? val date = new DateTime(format.parse(startdate))
? ? ? ? ? ? ? ? ? ? ? (for (i <- 0 to datediff)?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? yield Result(format.format(date.plusDays(2).toDate()),
? ? ? ? ? ? ? ? ? ? ? ? usage.toDouble / datediff.toDouble)).toArray
? ? ? ? ? ?} else {
? ? ? ? ? ? ? ? ? ? ? ? Array(Result(startdate, usage.toDouble))
? ? ? ? ? ? ? ? ? ? ?}
? ?}
val df2 = df.withColumn("dayusage", splitUsage($"datediff", $"startdate", $"usage"))
val df3 = df2.select($"*", explode($"dayusage"))
val result = df3.select($"Id", $"startdate", $"enddate", $"datediff", $"did",
col("col")("date").alias("date"), col("col")("usage").alias("usage"))