hive (default)> create database if not exists hive_test > comment 'hive database for test' > with dbproperties ('cretae'='heibanbai'); OK Time taken: 0.485 seconds
查看数据库列表
1 2 3 4 5 6
hive (default)> show databases; OK database_name default hive_test Time taken: 0.16 seconds, Fetched: 2 row(s)
使用数据库
1 2 3
hive (default)> use hive_test; OK Time taken: 0.026 seconds
hive (default)> alter database hive_test set dbproperties('createtime'='20220517'); OK Time taken: 8.537 seconds hive (default)> alter database hive_test set dbproperties('edited-by'='Eric'); OK Time taken: 0.058 seconds
查看数据库信息
1 2 3 4 5 6 7 8 9 10 11
hive (default)> desc database hive_test; OK db_name comment location owner_name owner_type parameters hive_test hive database fortest hdfs://node1:9000/user/hive/warehouse/hive_test.db hadoop USER Time taken: 0.342 seconds, Fetched: 1 row(s) # extended显示额外属性信息 hive (default)> desc database extended hive_test; OK db_name comment location owner_name owner_type parameters hive_test hive database fortest hdfs://node1:9000/user/hive/warehouse/hive_test.db hadoop USER {edited-by=Eric, createtime=20220517, cretae=heibanbai} Time taken: 0.035 seconds, Fetched: 1 row(s)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOTEXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later) [(col_name data_type [column_constraint_specification] [COMMENT col_comment], ... [constraint_specification])] [COMMENT table_comment] [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] [SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)] ON ((col_value, col_value, ...), (col_value, col_value, ...), ...) [STORED AS DIRECTORIES] [ [ROW FORMAT row_format] [STORED AS file_format] | STORED BY'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later) ] [LOCATION hdfs_path] [TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later) [AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
data_type : primitive_type | array_type | map_type | struct_type | union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type : TINYINT |SMALLINT |INT |BIGINT |BOOLEAN |FLOAT |DOUBLE |DOUBLE PRECISION-- (Note: Available in Hive 2.2.0 and later) | STRING |BINARY-- (Note: Available in Hive 0.8.0 and later) |TIMESTAMP-- (Note: Available in Hive 0.8.0 and later) |DECIMAL-- (Note: Available in Hive 0.11.0 and later) |DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later) |DATE-- (Note: Available in Hive 0.12.0 and later) |VARCHAR-- (Note: Available in Hive 0.12.0 and later) |CHAR-- (Note: Available in Hive 0.13.0 and later)
union_type : UNIONTYPE < data_type, data_type, ... >-- (Note: Available in Hive 0.7.0 and later)
row_format : DELIMITED [FIELDS TERMINATED BYchar [ESCAPED BYchar]] [COLLECTION ITEMS TERMINATED BYchar] [MAP KEYS TERMINATED BYchar] [LINES TERMINATED BYchar] [NULL DEFINED ASchar] -- (Note: Available in Hive 0.13 and later) | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
file_format: : SEQUENCEFILE | TEXTFILE -- (Default, depending on hive.default.fileformat configuration) | RCFILE -- (Note: Available in Hive 0.6.0 and later) | ORC -- (Note: Available in Hive 0.11.0 and later) | PARQUET -- (Note: Available in Hive 0.13.0 and later) | AVRO -- (Note: Available in Hive 0.14.0 and later) | JSONFILE -- (Note: Available in Hive 4.0.0 and later) | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
createtable if notexists student2( id int, name string) row format delimited fields terminated by'\t' stored as textfile location '/user/hive/warehouse/student2';
根据查询结果创建表(查询的结果会添加到新创建的表中)-实际常用
1
createtable if notexists student3 asselect id, name from student;
# 创建外部表 hive (hive_test)> create external table test_part_ch ( > id int, > name STRING, > sex STRING, > age INT, > birthday DATE > ) > ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t" > LOCATION '/tmp/test_cn.txt'; OK Time taken: 0.259 seconds
# 查看表数据 hive (hive_test)> select * from test_part_ch; OK test_part_ch.id test_part_ch.name test_part_ch.sex test_part_ch.age test_part_ch.birthday 1 a 男 22 NULL 2 b 男 23 NULL 3 c 男 24 NULL Time taken: 0.373 seconds, Fetched: 3 row(s)
# 创建表 hive (hive_test)>CREATEEXTERNALTABLE test_part( > id INT, > name STRING, > sex STRING, > age INT, > birthday DATE > ) > PARTITIONED BY (country STRING) -- 按照国家进行分区 >ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t" > LOCATION '/user/hive/warehouse/test_part'; OK Time taken: 0.975 seconds # 查看表定义 hive (hive_test)>show tables; OK tab_name test_part Time taken: 0.038 seconds, Fetched: 1row(s) hive (hive_test)>desc test_part; OK col_name data_type comment id int name string sex string age int birthday date country string
# Partition Information # col_name data_type comment
country string Time taken: 0.895 seconds, Fetched: 11row(s)
创建后查看HDFS信息,如下:
加载数据入表
提前准备两份数据在本地目录:
1 2 3 4 5 6 7 8 9 10
[hadoop@node1 tmp]$ pwd /app/tmp [hadoop@node1 tmp]$ cat test_cn.txt 1 a 男 22 1989/8/15 2 b 男 23 1989/8/15 3 c 男 24 1989/8/15 [hadoop@node1 tmp]$ [hadoop@node1 tmp]$ cat test_jp.txt 4 d 女 25 1989/8/15 5 e 女 26 1989/8/15
hive (hive_test)> load data inpath '/tmp/test_cn.txt' into table hive_test.test_part partition(country='cn'); Loading data to table hive_test.test_part partition (country=cn) OK Time taken: 2.083 seconds
hive (hive_test)> load data inpath '/tmp/test_jp.txt' into table hive_test.test_part partition(country='jp'); Loading data to table hive_test.test_part partition (country=jp) OK Time taken: 1.205 seconds
如果表是分区表则必须指定PARTITION从句,否则会报如下错误: FAILED:SemanticException [Error 10062]: Need to specify partition columns because the destination table is partitioned
CREATETABLE bucketed_user( firstname VARCHAR(64), lastname VARCHAR(64), address STRING, city VARCHAR(64), state VARCHAR(64), post STRING, phone1 VARCHAR(64), phone2 STRING, email STRING, web STRING ) COMMENT 'A bucketed sorted user table' PARTITIONED BY (country VARCHAR(64)) CLUSTERED BY (state) SORTED BY (city) INTO32 BUCKETS STORED AS SEQUENCEFILE;
INSERT OVERWRITE TABLE bucketed_user PARTITION (country) SELECT firstname , lastname , address , city , state , post , phone1 , phone2 , email , web , country FROM temp_user;
set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.max.dynamic.partitions.pernode=1000; set hive.enforce.bucketing =true;
DROPTABLE IF EXISTS bucketed_user;
CREATE TEMPORARY TABLE temp_user( firstname VARCHAR(64), lastname VARCHAR(64), address STRING, country VARCHAR(64), city VARCHAR(64), state VARCHAR(64), post STRING, phone1 VARCHAR(64), phone2 STRING, email STRING, web STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY',' LINES TERMINATED BY'\n' STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/home/user/user_table.txt'INTOTABLE temp_user;
CREATETABLE bucketed_user( firstname VARCHAR(64), lastname VARCHAR(64), address STRING, city VARCHAR(64), state VARCHAR(64), post STRING, phone1 VARCHAR(64), phone2 STRING, email STRING, web STRING ) COMMENT 'A bucketed sorted user table' PARTITIONED BY (country VARCHAR(64)) CLUSTERED BY (state) SORTED BY (city) INTO32 BUCKETS STORED AS SEQUENCEFILE;
set hive.enforce.bucketing =true; INSERT OVERWRITE TABLE bucketed_user PARTITION (country) SELECT firstname , lastname , address , city , state , post , phone1 , phone2 , email , web , country FROM temp_user;