Spark- Scala Wordcount Program

      Comments Off on Spark- Scala Wordcount Program
Spread the love

Only Practice no Theory

 import org.apache.spark.SparkContext

 import org.apache.log4j.Level
 import org.apache.log4j.Logger

 object wordCount {
  def main(args:Array[String]){
 Logger.getLogger("org").setLevel(Level.ERROR)
    print("hello")
   
    val sc = new SparkContext("local[*]","wordcount")
   val input= sc.textFile("c:\\data.txt")
  val words = input.flatMap(x => x.split(" "))
  val toupper = words.map(x => x.toUpperCase())
  
  val  wordcount = toupper.map(x => (x,1))
 val finalcount=  wordcount.reduceByKey((a,b) => a+b)
  finalcount.collect.foreach(println)
  scala.io.StdIn.readLine()
  }
}

View Running application on

http://localhost:4040/

Using above url you can track the like spark job task status

and DAG Flow diagram also