...
Code Block | ||
---|---|---|
| ||
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.1.6.10" % "provided" libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.1.6.10" % "provided" libraryDependencies += "com.databricks" %% "spark-csv" % "1.3.0" |
assembly Dependency
Code Block | ||
---|---|---|
| ||
// In build.sbt import sbt.Keys._ assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) |
...
Code Block | ||
---|---|---|
| ||
val confsparkSession = new SparkConfSparkSession.builder().setAppNameappName("example-spark-scala-read-and-write-from-hdfs") // Creation of SparContext and SQLContext val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc.getOrCreate() |
How to write a file to HDFS with Spark Scala?
...
Code Block | ||
---|---|---|
| ||
// Defining an Helloworld class case class HelloWorld(message: String) // ====== Creating a dataframe with 1 partition val df = Seq(HelloWorld("helloworld")).toDF().coalesce(1) // ======= Writing files // Writing Dataframe as parquet file df.write.format("parquet").mode("overwrite").savemode(SaveMode.Overwrite).parquet(hdfs_master + "user/hdfs/wiki/testwiki") // Writing Dataframe as csv file df.write.formatmode("com.databricks.spark.csv").mode("overwrite").saveSaveMode.Overwrite).csv(hdfs_master + "user/hdfs/wiki/testwiki.csv") |
...
Code Block | ||
---|---|---|
| ||
// ======= Reading files // Reading parquet files into a Spark Dataframe val df_parquet = sqlContextsession.read.parquet(hdfs_master + "user/hdfs/wiki/testwiki") // Reading csv files into a Spark Dataframe val df_csv = sqlContextsession.read.format("com.databricks.sparkcsv(hdfs_master + "user/hdfs/wiki/testwiki.csv") val df_csv = session.read.option("inferSchema", "true").loadcsv(hdfs_master + "user/hdfs/wiki/testwiki.csv") |
...