sparkstreaming

180 阅读1分钟
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}


object ScoketWordCount {

  -
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org").setLevel(Level.WARN)
    //    1、创建sparkConf对象
    val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName(this.getClass.getSimpleName)
    //    2、创建StreamingContext
    val ssc: StreamingContext = new StreamingContext(conf,Seconds(3))
    //    3、读取端口数据创建Dstream
    val lineDstream: ReceiverInputDStream[String] = ssc.socketTextStream("localhost",9999)
    //    4、将一行数据压平
    val wordDstream: DStream[String] = lineDstream.flatMap(_.split(" "))
    //    5、将但上次转化为元组(单词,1)
    val wordToOneDstream: DStream[(String, Int)] = wordDstream.map((_,1))
    //    6、按照单词统计单词出现的次数
    val wordToCountDstream: DStream[(String, Int)] = wordToOneDstream.reduceByKey(_+_)
    //    7、打印数据
    wordToCountDstream.print()
    //    8、开启任务
    ssc.start()
    //    9、将主线程阻塞起来。保证driver不退出
    ssc.awaitTermination()
  }
  
}

window安装nc命令 blog.csdn.net/sinat_36710…