Hive笔记


Hive

hive -e "script"
#eg:
./bin/hive -e "create table src(s string); select * from src;"

Hive 建表

create database if not exists tablename
location '/my/database/directory'
create database databasename
with dbproperties('creator'='bunshinn', 'date'='2018-02-10');
show databases like 'dim.*';

HIVE默认不允许删除包含有数据表的数据库, 可以在删除命令的最后加上关键字CASCADE, 实现直接删除.

drop database if exists databasename cascade;

HIVE的元数据信息是无法修改的, 但可以修改dbproperties

alter database dbname set dbproperies('edited-by'='bunshinn');

Hive 建表

create table employees (
    name string comment 'employee name',
    salary float,
    subordinates array<string>,
    deductions map<sstring, float>,
    address struct<street:string, city:string, state:string, zip:int>
    )
row format delimited
fields terminated by '\001'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile;
create table if not exists mydb.employees (
    name string comment 'Employee name',
    salary float,
    subordinates array<string>,
    deductions map<sstring, float>,
    address struct<street:string, city:string, state:string, zip:int>
    )
comment 'description of the table'
tblproperties ('creator'='me', 'created_at'='2018-02-10')
location '/user/hive/warehouse/mydb.db/employees';

copy表

create table if not exists mydb.employees2
like mydb.employees;

列举数据库表名

show tables in mydb;

外部表

create external table if not exists stocks (
    exchange string,
    symbol string,
    ymd string,
    price_open float,
    price_high float,
    price_low float,
    price_close float,
    volume int,
    price_adj_close float)
row format delimited fields terminated by ','
location '/data/stocks';

分区表/管理表

create external table if not exists log_messages (
    hms int,
    serverity string,
    server string,
    process_id int,
    message string)
partitioned by (year int, month int, day int)
row format delimited fields terminated by '\t';

增加一个新分区

alter table log_message add partition(year=2018, month=2, day=10)
location 'hdfs://master_server/data/log_message/2018/02/10';

修改表

alter table log_message rename to logmsgs;

修改列信息

alter table log_message
change column hms hours_minutes_seconds int
comment 'The hours, minutes, and seconds part of the timestamp'
after severity;

增加列

alter table log_message add columns (
    app_name string comment 'application name',
    session_id long comment 'the current session id');

修改表

alter table ... touch

数据操作

装载数据

load data local inpath '${env:HOME}/ce'
overwrite into table employees
partition (county='US', state='CA');

导出数据

insert overwrite local directory '/tmp/ca_employees'
select name, salary, address
from employees
where se.state='CA';

查询

select ... from语句
使用正则指定列

#函数
-- 数学函数
-- 聚合函数: count(), sum, avg, min, max, variance, corr
-- 表生成函数: explode, inline, json_tuple, jsonStr, parse_url_tuple, stack
-- 其他函数: cast, concat, concat_ws, context_ngrams, from_unixtime, unix_timestamp

#LIKE与RLIKE

group by语句
having语句
join语句
order by / sort by

#抽样查询
select * from numbers tablesample(bucket 3 out of 10 on rand()) s;

评论: