day15Hive_随笔_内存溢出

day15Hive 一.Hive

cp /export/server/hive-2.1.0/jdbc/hive-jdbc-2.1.0-standalone.jar /export/server/hive-2.1.0/lib/

配置hive环境变量：

export HIVE_HOME=/export/server/hive-2.1.0
export PATH=:$HIVE_HOME/bin:$PATH

初始化元数据：

cd /export/server/hive-2.1.0/

bin/schematool -dbType mysql -initSchema


    hadoop.proxyuser.root.hosts
    *


    hadoop.proxyuser.root.groups
    *

分发：

scp core-site.xml node2:$PWD

启动meta store

nohup /export/server/hive-2.1.0/bin/hive --service metastore &
nohup /export/server/hive-2.1.0/bin/hive --service hiveserver2 &

beeline
 !connect jdbc:hive2://node3:10000

#!/bin/expect
spawn beeline 
set timeout 5
expect "beeline>"
send "!connect jdbc:hive2://node3:10000r"
expect "Enter username for jdbc:hive2://node3:10000:"
send "rootr"
expect "Enter password for jdbc:hive2://node3:10000:"
send "123456r"
interact

mysql一键启动

chmod 777 beenline.exp
chmod 777 mysql.exp
expect mysql.exp

create table if not exists stu3(id int,name string) row format delimited fields terminated by 't';

将查询到的表作为新表：

create table stux as select * from stu3;

复制其他表的表结构：

create table stux like stu3;

查询表的类型：

desc formatted  stu2;

已有数据和hive中表关联

hadoop fs -put data_flow.dat  /user/hive/warehouse/myflow.db/flow

14.3外部表 *** 作：
分别创建老师与学生外部表，并向表中加载数据

use myhive;
create external table  teach(tid int,tname string) row format delimited
    fields terminated by ' ';
create external table  student(sid int,sname string,sbirth string,ssex string) row format delimited
    fields terminated by ' ';

从本地向表中加载数据：

load data local inpath '/export/data/hivedata/student.data' into table student;
load data local inpath '/export/data/hivedata/teacher.data' into table  teach;

覆盖表的内容：

从hdfs向表中加载数据
将数据上传到hdfs（从hdfs加载本质是原文件剪切到表目录文件）

load data inpath 'hdfs中文件的位置' into table 表名；

多表共享数据：

create external table  student(sid int,sname string,sbirth string,ssex string) row format delimited
    fields terminated by ' ' location '数据目录'；

create external  table  hive_array(
    name string,
    city array
)row format delimited fields terminated by 't'
collection items terminated by ',';
load data  local inpath '/export/data/hivedata/arr.data' into table hive_array ;

数据

zhangsan	  beijing,shanghai,tianjin,hangzhou
wangwu   	changchun,chengdu,wuhan,beijin

数组的长度：size

select  name ,size(city) v from hive_array;

是否包含：array_contains

select  name from hive_array where array_contains(city,"tianjin");

15.2 map类型

create  external  table  hive_map(
    id int ,
    name string,
    membbers map,
    age int
)row format delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';
load data  local inpath '/export/data/hivedata/map.data'into table hive_map;
select * from hive_map;

可以根据键查询

select  name ,age,membbers from hive_map where membbers['father']="xiaoming";

获取所有的键：map_keys（members）
15.3 struct类似于Javabeen

create external  table  hive_struct(
    ip string,
    info struct
)row format delimited fields terminated by '#'
collection items terminated by ':';
load data local inpath '/export/data/hivedata/struct.data'into table hive_struct;
select  * from hive_struct;
select ip from hive_struct where info.name="zhangsan";

分区

分区表

分区

create table score(
    sid int,
    cid int,
    score int
)partitioned by (month string) row format delimited fields terminated by't';
--分区表在加载数据时要制定数据放在哪个文件夹下
load data  local inpath '/export/data/hivedata/score.data'into table score
    partition (month='202202');
    根据month查
select  * from score where month=202201;

注：hive中的分区是分文件夹，MapReduce的分区是分文件
16.4多级分区：

create table score2(
    sid int,
    cid int,
    score int
)partitioned by (year string,month string,day string) row
    format delimited fields terminated by't';
--分区表在加载数据时要制定数据放在哪个文件夹下
load data  local inpath '/export/data/hivedata/score.data'into table score2
    partition (year='2022',month='01',day='1');
select  * from score2 where month=202201;
desc score2;

查看表的所有分区：

show partitions score2;

添加分区

添加一个
alter  table  score add partition (month='20201')

添加多个分区

alter  table  score add partition (month='20201')partition (month='202012')

删除分区：

alter  table  score drop partition (month='20201')

--分区表插入数据
insert into table score partition (month='202201')values (1,1,1);

欢迎分享，转载请注明来源：内存溢出

原文地址: https://www.outofmemory.cn/zaji/5716997.html

day15Hive

发表评论

评论列表（0条）