Github Project : example-java-read-and-write-from-hdfs
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> |
HDFS URI are like that : hdfs://namenodedns:port/user/hdfs/folder/file.csv
Default port is 8020.
// ====== Init HDFS File System Object Configuration conf = new Configuration(); // Set FileSystem URI conf.set("fs.defaultFS", hdfsuri); // Because of Maven conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); // Set HADOOP user System.setProperty("HADOOP_USER_NAME", "hdfs"); System.setProperty("hadoop.home.dir", "/"); //Get the filesystem - HDFS FileSystem fs = FileSystem.get(URI.create(hdfsuri), conf); |
//==== Create folder if not exists Path workingDir=fs.getWorkingDirectory(); Path newFolderPath= new Path(path); if(!fs.exists(newFolderPath)) { // Create new Directory fs.mkdirs(newFolderPath); logger.info("Path "+path+" created."); } |
//==== Write file logger.info("Begin Write file into hdfs"); //Create a path Path hdfswritepath = new Path(newFolderPath + "/" + fileName); //Init output stream FSDataOutputStream outputStream=fs.create(hdfswritepath); //Cassical output stream usage outputStream.writeBytes(fileContent); outputStream.close(); logger.info("End Write file into hdfs"); |
//==== Read file logger.info("Read file from hdfs"); //Create a path Path hdfsreadpath = new Path(newFolderPath + "/" + fileName); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //Classical input stream usage String out= IOUtils.toString(inputStream, "UTF-8"); logger.info(out); inputStream.close(); fs.close(); |