Hadoop HDFS文件操作的Java代码-白红宇

Hadoop HDFS文件操作的Java代码

阅读量：6857 次

发布时间：2019-06-26

本文共 6439 字，大约阅读时间需要 21 分钟。

1、创建目录

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class MakeDir {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx");		fs.create(path);		fs.close();	}}

2、删除目录

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteDir {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);				Path path = new Path("/user/hadoop/hdfs/xxxx");		fs.delete(path);		fs.close();	}}

3、写文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class WriteFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");		FSDataOutputStream out = fs.create(path);		out.writeUTF("da jia hao,cai shi zhen de hao!");		fs.close();	}}

4、读文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class ReadFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");				if(fs.exists(path)){			FSDataInputStream is = fs.open(path);			FileStatus status = fs.getFileStatus(path);			byte[] buffer = new byte[Integer.parseInt(String.valueOf(status.getLen()))];			is.readFully(0, buffer);			is.close();            fs.close();            System.out.println(buffer.toString());		}	}}

5、上传本地文件到HDFS

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile {	public static void main(String[] args) throws IOException {				Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path src = new Path("/home/hadoop/xxxx.txt");		Path dst = new Path("/user/hadoop/hdfs/");		fs.copyFromLocalFile(src, dst);		fs.close();	}}

6、删除文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);				Path path = new Path("/user/hadoop/hdfs/xxxx.txt");		fs.delete(path);		fs.close();	}}

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class GetAllChildFile {	static Configuration conf = new Configuration();			public static void main(String[] args)throws IOException {		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop");		getFile(path,fs);		//fs.close();	}		public static void getFile(Path path,FileSystem fs) throws IOException {				FileStatus[] fileStatus = fs.listStatus(path);		for(int i=0;i

8、查找某个文件在HDFS集群的位置

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile {		public static void main(String[] args) throws IOException {			getFileLocal();	}		/**	 * 查找某个文件在HDFS集群的位置	 * @Title:  	 * @Description: 	 * @param 	 * @return 	 * @throws	 */	public static void getFileLocal() throws IOException{		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");				FileStatus status = fs.getFileStatus(path);		BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());				int length = locations.length;		for(int i=0;i

9、HDFS集群上所有节点名称信息

package com.hadoop.file;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile {		public static void main(String[] args) throws IOException {			getHDFSNode();	}		/**	 * HDFS集群上所有节点名称信息	 * @Title:  	 * @Description: 	 * @param 	 * @return 	 * @throws	 */	public static void getHDFSNode() throws IOException{		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		DistributedFileSystem  dfs = (DistributedFileSystem)fs;		DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();				for(int i=0;i

伪分布环境下操作FileSystem时候会出现异常：　　

Java代码如下：

FileSystem fs = FileSystem.get(conf);     in = fs.open(new Path("hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in"));

抛出异常如下：

Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected: file:///

at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)

at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47)

at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357)

at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245)

at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125)

at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283)

at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356)

at com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23)

解决方案：

将hadoop的core-site.xml和hdfs-site.xml放到当前工程下（Eclipse工作目录的bin文件夹下面）即可。

总结：

因为是访问远程的HDFS 需要通过URI来获得FileSystem。　　

转载于:https://www.cnblogs.com/wuzhenquan/p/3617751.html

你可能感兴趣的文章

Samsung_tiny4412(驱动笔记01)----linux 3.5,U-Boot,Busybox,SD卡启动环境搭建

EFDC水模型初学者入门及软件下载学习指导

查看>>

Asp.Net Core采用MailKit部署到Linux Docker连接邮件服务器报错

查看>>

ucenter用户中心头像修改,不使用自带方法，不使用flash 转

Springboot 如何加密，以及利用Swagger2构建Restful API

C#设计模式：模板方法模式（Template Method）