从队列中创建streamingrdd

179 阅读1分钟
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}

import scala.collection.mutable

object RddWordCount {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org").setLevel(Level.WARN)
//    1、创建SparkConf
    val conf: SparkConf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[*]")
//    2、创建StreamingContext对象
    val ssc = new StreamingContext(conf,Seconds(3))
//    3、创建一个队列
    val queue= new mutable.Queue[RDD[Int]]()
//    4、使用ssc读取rrd创建Dstream  oneAtATime每次是否只取一个RDD
    val rddDs: InputDStream[Int] = ssc.queueStream(queue,oneAtATime = false)
//    5、计算wordcount
    val result: DStream[(Int, Int)] = rddDs.map((_,1)).reduceByKey(_+_)
    result.print()
    ssc.start()
    <!---必须写到start后面 和 awaitTermination前面---->
    for (i<-1 to 5){
      queue += ssc.sparkContext.makeRDD(1 to 300,10)
      Thread.sleep(2000)
    }
    ssc.awaitTermination()
  }

}