hadoop HDFS(二)

2017-09-23

hadoop

大数据

hdfs的shell命令

hadoop fs  # 帮助命令
hadoop fs -df -h /
hadoop fs -du -s -h hdfs://cluster1:9000/*
hadoop fs -rm -f -R hdfs://cluster1:9000/*
# 上传文件,保存在/root/hadoop-2.8.1/tmp/dfs
hadoop fs -put install_hadoop.sh hdfs://cluster1:9000/
# 下载 只是权限变了
hadoop fs -get hdfs://cluster1:9000/install_hadoop.sh ./ 
# 创建目录
hadoop fs -mkdir -p /wordcount/input # 同下
hadoop fs -mkdir -p hdfs://cluster1:9000/wordcount/input
# 浏览器查看文件
http://192.168.1.222:50070
# 通过上面的浏览器--Utilities--Browse the file system就可以看到上传的文件
# 查看目录
hadoop fs -ls /wordcount/output
# 查看文件内容
hadoop fs -cat /wordcount/output/part-r-00000

/root/hadoop-2.8.1/tmp/dfs/data/current/BP-XX/current/finalized:保存datanode主机的 block块的地方

java客户端调用HDFS API

安装eclipse,配置所需要的jar包和配置文件

/root/hadoop-2.8.1/share/hadoop/common/下jar包,以及common jar中的依赖包common/bin 的所有jar
/root/hadoop-2.8.1/share/hadoop/hdfs/下jar包,以及hdfs jar中的依赖包hdfs/bin 的所有jar,以及yarn,mapreduce目录下的

将/root/hadoop-2.8.1/etc下的 core-site.xml和hdfs-site.xml文件放在Project的src目录下

代码实现

package cn.itcast.hadoop.hdfs;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Before;
import org.junit.Test;
public class HdfsUtil {
    
    FileSystem fs = null;
    
    @Before
    public void init() throws IOException, InterruptedException, URISyntaxException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://cluster1:9000/");
        // 设置权限
        fs = FileSystem.get(new URI("hdfs://cluster1:9000/"),conf,
            "root");
        //如果在集群中只需要指定dfs.nameservices的值即可:hdfs://ns1/
        //fs需要哪些成员才能读写hdfs文件 
        // fs-->RPCProxy-->NameNode.open(src)
    }
    
    @Test
    public void download() throws IOException {
        // input stream
        Path src =new Path("hdfs://cluster1:9000/install_hadoop.sh");
        FSDataInputStream in =fs.open(src);
        
        //ouput local
        FileOutputStream os= 
            new FileOutputStream("/root/Downloads/install2.sh");
        IOUtils.copy(in, os);
    }
    
    @Test
    public void download2() throws IOException {
        fs.copyFromLocalFile(
                new Path("hdfs://cluster1:9000/upload2.txt"),
                new Path("/root/Downloads/install2.sh")
                );
    }
    
    @Test
    public void upload() throws IOException {
        // to upload a file to hdfs
        Path dst =new Path("hdfs://cluster1:9000/upload.txt");
        FSDataOutputStream os =fs.create(dst);
        
        FileInputStream in= 
            new FileInputStream("/root/Downloads/install2.sh");
        IOUtils.copy(in, os);
    }
    
    @Test
    public void upload2() throws IOException {
        fs.copyFromLocalFile(
                new Path("/root/Downloads/install2.sh"), 
                new Path("hdfs://cluster1:9000/a/b/upload2.txt"));
    }
    
    
    @Test
    public void mkdir() throws IllegalArgumentException, IOException {
        
        fs.mkdirs(new Path("/a/b"));
    }
    
    @Test
    public void rm() throws IllegalArgumentException, IOException {
        fs.delete(new Path("/a"),true);
    }
    
    @Test
    public void listFiles() throws FileNotFoundException, IllegalArgumentException, IOException {
        RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/"), true);
        while(files.hasNext()) {
            LocatedFileStatus file = files.next();
            //LocatedFileStatus 是通过RPC机制 得到的 fs也是类似
            Path filepath = file.getPath();
            String fileName = filepath.getName();
            System.out.println(fileName);
        }
        System.out.println("--------------");
        FileStatus[] listStatus = fs.listStatus(new Path("/"));
        for(FileStatus status: listStatus) {
            String name =status.getPath().getName();
            System.out.println(name);
        }
    }
}

权限问题

如果不是在虚拟机上测试的话,会有权限的问题,需要在eclipse的Run Configuration的Arguments中的VM arguments中增加如下信息
-DHADOOP_USER_NAME=root