hadoop HDFS(二)

hdfs的shell命令

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
hadoop fs # 帮助命令
hadoop fs -df -h /
hadoop fs -du -s -h hdfs://cluster1:9000/*
hadoop fs -rm -f -R hdfs://cluster1:9000/*
# 上传文件,保存在/root/hadoop-2.8.1/tmp/dfs
hadoop fs -put install_hadoop.sh hdfs://cluster1:9000/
# 下载 只是权限变了
hadoop fs -get hdfs://cluster1:9000/install_hadoop.sh ./
# 创建目录
hadoop fs -mkdir -p /wordcount/input # 同下
hadoop fs -mkdir -p hdfs://cluster1:9000/wordcount/input
# 浏览器查看文件
http://192.168.1.222:50070
# 通过上面的浏览器--Utilities--Browse the file system就可以看到上传的文件
# 查看目录
hadoop fs -ls /wordcount/output
# 查看文件内容
hadoop fs -cat /wordcount/output/part-r-00000
HDFS实现机制

/root/hadoop-2.8.1/tmp/dfs/data/current/BP-XX/current/finalized:保存datanode主机的 block块的地方

java客户端调用HDFS API

安装eclipse,配置所需要的jar包和配置文件
  1. /root/hadoop-2.8.1/share/hadoop/common/下jar包,以及common jar中的依赖包common/bin 的所有jar
  2. /root/hadoop-2.8.1/share/hadoop/hdfs/下jar包,以及hdfs jar中的依赖包hdfs/bin 的所有jar,以及yarn,mapreduce目录下的
  3. 将/root/hadoop-2.8.1/etc下的 core-site.xml和hdfs-site.xml文件放在Project的src目录下

    代码实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    package cn.itcast.hadoop.hdfs;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.net.URI;
    import java.net.URISyntaxException;
    import org.apache.commons.compress.utils.IOUtils;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataInputStream;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.LocatedFileStatus;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.fs.RemoteIterator;
    import org.junit.Before;
    import org.junit.Test;
    public class HdfsUtil {
    FileSystem fs = null;
    @Before
    public void init() throws IOException, InterruptedException, URISyntaxException {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://cluster1:9000/");
    // 设置权限
    fs = FileSystem.get(new URI("hdfs://cluster1:9000/"),conf,
    "root");
    //如果在集群中只需要指定dfs.nameservices的值即可:hdfs://ns1/
    //fs需要哪些成员才能读写hdfs文件
    // fs-->RPCProxy-->NameNode.open(src)
    }
    @Test
    public void download() throws IOException {
    // input stream
    Path src =new Path("hdfs://cluster1:9000/install_hadoop.sh");
    FSDataInputStream in =fs.open(src);
    //ouput local
    FileOutputStream os=
    new FileOutputStream("/root/Downloads/install2.sh");
    IOUtils.copy(in, os);
    }
    @Test
    public void download2() throws IOException {
    fs.copyFromLocalFile(
    new Path("hdfs://cluster1:9000/upload2.txt"),
    new Path("/root/Downloads/install2.sh")
    );
    }
    @Test
    public void upload() throws IOException {
    // to upload a file to hdfs
    Path dst =new Path("hdfs://cluster1:9000/upload.txt");
    FSDataOutputStream os =fs.create(dst);
    FileInputStream in=
    new FileInputStream("/root/Downloads/install2.sh");
    IOUtils.copy(in, os);
    }
    @Test
    public void upload2() throws IOException {
    fs.copyFromLocalFile(
    new Path("/root/Downloads/install2.sh"),
    new Path("hdfs://cluster1:9000/a/b/upload2.txt"));
    }
    @Test
    public void mkdir() throws IllegalArgumentException, IOException {
    fs.mkdirs(new Path("/a/b"));
    }
    @Test
    public void rm() throws IllegalArgumentException, IOException {
    fs.delete(new Path("/a"),true);
    }
    @Test
    public void listFiles() throws FileNotFoundException, IllegalArgumentException, IOException {
    RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/"), true);
    while(files.hasNext()) {
    LocatedFileStatus file = files.next();
    //LocatedFileStatus 是通过RPC机制 得到的 fs也是类似
    Path filepath = file.getPath();
    String fileName = filepath.getName();
    System.out.println(fileName);
    }
    System.out.println("--------------");
    FileStatus[] listStatus = fs.listStatus(new Path("/"));
    for(FileStatus status: listStatus) {
    String name =status.getPath().getName();
    System.out.println(name);
    }
    }
    }

权限问题

如果不是在虚拟机上测试的话,会有权限的问题,需要在eclipse的Run Configuration的Arguments中的VM arguments中增加如下信息
-DHADOOP_USER_NAME=root

Share Comments