博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Hadoop HDFS文件操作的Java代码
阅读量:6857 次
发布时间:2019-06-26

本文共 6439 字,大约阅读时间需要 21 分钟。

1、创建目录

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class MakeDir {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx");		fs.create(path);		fs.close();	}}

2、删除目录

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteDir {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);				Path path = new Path("/user/hadoop/hdfs/xxxx");		fs.delete(path);		fs.close();	}}

3、写文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class WriteFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");		FSDataOutputStream out = fs.create(path);		out.writeUTF("da jia hao,cai shi zhen de hao!");		fs.close();	}}

4、读文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class ReadFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");				if(fs.exists(path)){			FSDataInputStream is = fs.open(path);			FileStatus status = fs.getFileStatus(path);			byte[] buffer = new byte[Integer.parseInt(String.valueOf(status.getLen()))];			is.readFully(0, buffer);			is.close();            fs.close();            System.out.println(buffer.toString());		}	}}

5、上传本地文件到HDFS

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile {	public static void main(String[] args) throws IOException {				Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path src = new Path("/home/hadoop/xxxx.txt");		Path dst = new Path("/user/hadoop/hdfs/");		fs.copyFromLocalFile(src, dst);		fs.close();	}}

6、删除文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteFile {	public static void main(String[] args) throws IOException {		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);				Path path = new Path("/user/hadoop/hdfs/xxxx.txt");		fs.delete(path);		fs.close();	}}

7、获取给定目录下的所有子目录以及子文件

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class GetAllChildFile {	static Configuration conf = new Configuration();			public static void main(String[] args)throws IOException {		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop");		getFile(path,fs);		//fs.close();	}		public static void getFile(Path path,FileSystem fs) throws IOException {				FileStatus[] fileStatus = fs.listStatus(path);		for(int i=0;i

8、查找某个文件在HDFS集群的位置

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile {		public static void main(String[] args) throws IOException {			getFileLocal();	}		/**	 * 查找某个文件在HDFS集群的位置	 * @Title:  	 * @Description: 	 * @param 	 * @return 	 * @throws	 */	public static void getFileLocal() throws IOException{		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");				FileStatus status = fs.getFileStatus(path);		BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());				int length = locations.length;		for(int i=0;i

9、HDFS集群上所有节点名称信息

package com.hadoop.file;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class FindFile {		public static void main(String[] args) throws IOException {			getHDFSNode();	}		/**	 * HDFS集群上所有节点名称信息	 * @Title:  	 * @Description: 	 * @param 	 * @return 	 * @throws	 */	public static void getHDFSNode() throws IOException{		Configuration conf = new Configuration();		FileSystem fs = FileSystem.get(conf);		DistributedFileSystem  dfs = (DistributedFileSystem)fs;		DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();				for(int i=0;i

 

伪分布环境下操作FileSystem时候会出现异常:  

Java代码如下:

FileSystem fs = FileSystem.get(conf);     in = fs.open(new Path("hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in"));

 

抛出异常如下:

    Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected: file:/// 
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310) 
        at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47) 
        at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357) 
        at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245) 
        at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125) 
        at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283) 
        at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356) 
        at com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23) 
解决方案:
将hadoop的core-site.xml和hdfs-site.xml放到当前工程下(Eclipse工作目录的bin文件夹下面)即可。

总结:

因为是访问远程的HDFS 需要通过URI来获得FileSystem。  

  

  

  

  

  

  

  

转载于:https://www.cnblogs.com/wuzhenquan/p/3617751.html

你可能感兴趣的文章
导数、偏导数、方向导数、梯度、梯度下降
查看>>
C# 获取MAC地址
查看>>
Samsung_tiny4412(驱动笔记01)----linux 3.5,U-Boot,Busybox,SD卡启动环境搭建
查看>>
Linux ldconfig
查看>>
Linux 更改ssh 端口
查看>>
绝对值排序
查看>>
UVA 12716 GCDXOR 数论
查看>>
EFDC水模型 初学者入门 及软件下载学习指导
查看>>
Asp.Net Core采用MailKit部署到Linux Docker连接邮件服务器报错
查看>>
ucenter用户中心头像修改,不使用自带方法,不使用flash 转
查看>>
更改SQLServer实例默认字符集
查看>>
Ubuntu常用软件安装与使用
查看>>
Springboot 如何加密,以及利用Swagger2构建Restful API
查看>>
C++知识点总结(5)
查看>>
高性能Java科学与技术运算库Colt
查看>>
用前端将链接转为二维码,并下载
查看>>
nginx gzip压缩
查看>>
C#设计模式:模板方法模式(Template Method)
查看>>
SpringBoot项目以服务器方式启动
查看>>
静态文件
查看>>