0%

HDFS基本命令

HDFS基本命令

HDFS操作的命令与shell的命令很多都是一致的,只是加了一个hadoop fs或者hdfs dfs前缀而已

dfs是fs的实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
hdfs dfs [参数]


-------------------
[-appendToFile <localsrc> ... <dst>]
[-cat [-ignoreCrc] <src> ...]
[-checksum [-v] <src> ...]
[-chgrp [-R] GROUP PATH...]
[-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
[-chown [-R] [OWNER][:[GROUP]] PATH...]
[-copyFromLocal [-f] [-p] [-l] [-d] [-t <thread count>] <localsrc> ... <dst>]
[-copyToLocal [-f] [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-count [-q] [-h] [-v] [-t [<storage type>]] [-u] [-x] [-e] <path> ...]
[-cp [-f] [-p | -p[topax]] [-d] <src> ... <dst>]
[-createSnapshot <snapshotDir> [<snapshotName>]]
[-deleteSnapshot <snapshotDir> <snapshotName>]
[-df [-h] [<path> ...]]
[-du [-s] [-h] [-v] [-x] <path> ...]
[-expunge [-immediate] [-fs <path>]]
[-find <path> ... <expression> ...]
[-get [-f] [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-getfacl [-R] <path>]
[-getfattr [-R] {-n name | -d} [-e en] <path>]
[-getmerge [-nl] [-skip-empty-file] <src> <localdst>]
[-head <file>]
[-help [cmd ...]] 帮助命令
[-ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [-e] [<path> ...]]
[-mkdir [-p] <path> ...]
[-moveFromLocal <localsrc> ... <dst>]
[-moveToLocal <src> <localdst>]
[-mv <src> ... <dst>]
[-put [-f] [-p] [-l] [-d] <localsrc> ... <dst>]
[-renameSnapshot <snapshotDir> <oldName> <newName>]
[-rm [-f] [-r|-R] [-skipTrash] [-safely] <src> ...]
[-rmdir [--ignore-fail-on-non-empty] <dir> ...]
[-setfacl [-R] [{-b|-k} {-m|-x <acl_spec>} <path>]|[--set <acl_spec> <path>]]
[-setfattr {-n name [-v value] | -x name} <path>]
[-setrep [-R] [-w] <rep> <path> ...]
[-stat [format] <path> ...]
[-tail [-f] [-s <sleep interval>] <file>]
[-test -[defswrz] <path>]
[-text [-ignoreCrc] <src> ...]
[-touch [-a] [-m] [-t TIMESTAMP ] [-c] <path> ...]
[-touchz <path> ...]
[-truncate [-w] <length> <path> ...]
[-usage [cmd ...]]

Generic options supported are:
-conf <configuration file> specify an application configuration file
-D <property=value> define a value for a given property
-fs <file:///|hdfs://namenode:port> specify default filesystem URL to use, overrides 'fs.defaultFS' property from configurations.
-jt <local|resourcemanager:port> specify a ResourceManager
-files <file1,...> specify a comma-separated list of files to be copied to the map reduce cluster
-libjars <jar1,...> specify a comma-separated list of jar files to be included in the classpath
-archives <archive1,...> specify a comma-separated list of archives to be unarchived on the compute machines

help帮助命令

1
2
3
4
5
6
7
8
9
# 可以查看任何命令的帮助命令
# hdfs dfs -help 命令
hdfs dfs -help cat

----------------
# 会列出来该命令的参数以及作用
-cat [-ignoreCrc] <src> ... :
Fetch all files that match the file pattern <src> and display their content on
stdout.

查询命令

ls命令—显示目录信息

1
2
3
4
5
6
7

#语法1 hdfs dfs -ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [-e] <args>
#语法2 hadoop fs -ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [-e] <args>
hdfs dfs -ls /
Found 2 items
drwxr-xr-x - zhanghe supergroup 0 2021-03-23 17:41 /test
drwx------ - zhanghe supergroup 0 2021-03-23 20:36 /tmp

cat命令—查看文件内容

1
2
3
#语法1 hdfs dfs -cat [-ignoreCrc] URI [URI ...]
#语法2 hadoop fs -cat [-ignoreCrc] URI [URI ...]
hdfs dfs -cat /testHdfs/tmp/test.xml

tail命令—显示文件末尾

1
2
3
#语法1 hdfs dfs -tail [-f] URI
#语法2 hadoop fs -tail [-f] URI
hdfs dfs -tail /testHdfs/tmp/test.xml

du命令—统计文件夹大小信息

1
2
3
#语法1 hdfs dfs -du [-s] [-h] [-v] [-x] URI [URI ...]
#语法2 hadoop fs -du [-s] [-h] [-v] [-x] URI [URI ...]
hdfs dfs -du /testHdfs/tmp

目录操作命令

mkdir命令—创建目录

1
2
3
4
5
6
#语法1 hdfs dfs -mkdir [-p] <paths>
#语法2 hadoop fs -mkdir [-p] <paths>
hdfs dfs -mkdir /testHdfs

# -p 如果目录存在不会失败,可以创建子目录
hdfs dfs -mkdir -p /testHdfs/tmp

rmdir命令—删除空文件夹

1
2
3
#语法1 hdfs dfs -rmdir [--ignore-fail-on-non-empty] URI [URI ...]
#语法2 hadoop fs -rmdir [--ignore-fail-on-non-empty] URI [URI ...]
hdfs dfs -rmdir /testHdfs/tmp

文件操作命令

moveFromLocal命令—将本地文件剪切到HDFS

1
2
3
#语法1 hdfs dfs -moveFromLocal <localsrc> <dst>
#语法2 hadoop fs -moveFromLocal <localsrc> <dst>
hdfs dfs -moveFromLocal ./test.xml /testHdfs/tmp

这里注意一下,这个操作是剪切操作,操作之后本地的文件就不存在了

copyFromLocal命令—将本地文件复制到HDFS

1
2
3
#语法1 hdfs dfs -copyFromLocal <localsrc> URI
#语法2 hadoop fs -copyFromLocal <localsrc> URI
hdfs dfs -copyFromLocal ./test.xml /testHdfs/tmp

不同于moveFromLocal,copyFromLocal是复制文件的命令

appendToFile命令—追加一个文件到已存在文件的末尾

1
2
3
#语法1 hdfs dfs -appendToFile <localsrc> ... <dst>
#语法2 hadoop fs -appendToFile <localsrc> ... <dst>
hdfs dfs -appendToFile ./test1.xml /testHdfs/tmp/test.xml

copyToLocal命令—从HDFS复制到本地

1
2
3
#语法1 hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI <localdst>
#语法2 hadoop fs -copyToLocal [-ignorecrc] [-crc] URI <localdst>
hdfs dfs -copyFromLocal /testHdfs/tmp/test.xml ./test.xml

cp命令—文件复制

1
2
3
#语法1 hdfs dfs -cp [-f] [-p | -p[topax]] URI [URI ...] <dest>
#语法2 hadoop fs -cp [-f] [-p | -p[topax]] URI [URI ...] <dest>
hdfs dfs -cp /testHdfs/tmp/test.xml /testHdfs

mv命令—文件剪切

1
2
3
#语法1 hdfs dfs -mv URI [URI ...] <dest>
#语法2 hadoop fs -mv URI [URI ...] <dest>
hdfs dfs -mv /testHdfs/tmp/test.xml /testHdfs

get命令—相当于copyToLocal从HDFS复制到本地

1
2
3
#语法1 hdfs dfs -get [-ignorecrc] [-crc] [-p] [-f] <src> <localdst>
#语法2 hadoop fs -get [-ignorecrc] [-crc] [-p] [-f] <src> <localdst>
hdfs dfs -get /testHdfs/tmp/test.xml ./test.xml

getmerge命令—合并下载多个文件

1
2
3
#语法1 hdfs dfs -getmerge [-nl] <src> <localdst>
#语法2 hadoop fs -getmerge [-nl] <src> <localdst>
hdfs dfs -getmerge /testHdfs/tmp/test.xml /testHdfs/tmp/test1.xml ./merge.xml

put命令—相当于copyFromLocal将本地文件复制到HDFS

1
2
3
#语法1 hdfs dfs -put [-f] [-p] [-l] [-d] [ - | <localsrc1> .. ]. <dst>
#语法2 hadoop fs -put [-f] [-p] [-l] [-d] [ - | <localsrc1> .. ]. <dst>
hdfs dfs -put ./merge.xml /testHdfs/tmp

rm命令—删除文件或文件夹

1
2
3
4
5
6
#语法1 hdfs dfs -rm [-f] [-r |-R] [-skipTrash] [-safely] URI [URI ...]
#语法2 hadoop fs -rm [-f] [-r |-R] [-skipTrash] [-safely] URI [URI ...]
hdfs dfs -rm /testHdfs/tmp/merge.xml

# -r删除目录以及目录下的文件
hdfs dfs -rm -r /testHdfs/tmp