把本地文件夹下的所有文件上传到hdfs上并合并成一个文件
uttq1257
9年前
来自: http://my.oschina.net/u/914897/blog/616682
需要自己写代码来实现:
/** * */ package com.jason.hadoop.example; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; /** * @author jason * */ public class PutMerge { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); FileSystem hdfs = FileSystem.get(URI.create(args[1]), conf); // FileSystem hdfs = FileSystem.get(conf); FileSystem local = FileSystem.getLocal(conf); Path inputDlir = new Path(args[0]); Path hdfsFile = new Path(args[2]); try { FileStatus[] inputFiles = local.listStatus(inputDlir); FSDataOutputStream out = hdfs.create(hdfsFile); for (int i=0; i<inputFiles.length; i++) { System.out.println(inputFiles[i].getPath().getName()); FSDataInputStream in = local.open(inputFiles[i].getPath()); byte[] buffer = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0 , bytesRead); } in.close(); } out.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } }
把工程打成jar包后,执行以下命令即可:
hadoop jar hadoopExample-1.0-SNAPSHOT.jar com.jason.hadoop.example.PutMerge /home/jason/hadoop-1.0.1/put_merge hdfs://localhost:9000 /example/put_merge/in/merge_222.txt