OLAP引擎:clickhouse, presto

OLAP引擎:clickhouse, presto,第1张

OLAP引擎:clickhouse, presto

文章目录
  • 1, clickhouse
    • a, 安装服务
    • b, 测试使用
    • c, 配置集群
    • d, 通过mysql协议连接clickhouse
  • 2, presto
    • a, 连接hive

1, clickhouse a, 安装服务
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64

sudo yum install clickhouse-server clickhouse-client
/etc/init.d/clickhouse-server start

#客户端测试连接:
[root@c7 ssh]# clickhouse-client -h c7 <<< 'SELECt 1'
1
[root@c7 ssh]# echo 'SELECT 1' | clickhouse-client -h c7
1
[root@c7 ssh]# clickhouse-client  -h c73 -mn -q ' create database test; use default; show tables;'
test
b, 测试使用

示例建表语句和测试数据:https://clickhouse.com/docs/zh/getting-started/tutorial/

#1,创建数据库,表
clickhouse-client -h c73 --query "CREATE DATAbase IF NOT EXISTS tutorial"
clickhouse-client -h c73  -mn 
c73 :) 
CREATE TABLE tutorial.hits_v1
   (
    `WatchID` UInt64,
    `JavaEnable` UInt8,
    `Title` String,
    `GoodEvent` Int16,
    `EventTime` DateTime,
    `EventDate` Date,
    `CounterID` UInt32,
    `ClientIP` UInt32,
    `ClientIP6` FixedString(16),
    `RegionID` UInt32,
    `UserID` UInt64,
    `CounterClass` Int8,
    `OS` UInt8,
    `UserAgent` UInt8,
    `URL` String,
    `Referer` String,
    `URLDomain` String,
    `RefererDomain` String,
    `Refresh` UInt8,
    `IsRobot` UInt8,
    `RefererCategories` Array(UInt16),
	 ... 
    `ParsedParams` Nested(
        Key1 String,
        ...
        ValueDouble Float64),
    `IslandID` FixedString(16),
    `RequestNum` UInt32,
    `RequestTry` UInt8
  )
ENGINE = MergeTree()
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

c73 :)
CREATE TABLE tutorial.visits_v1
  (
    `CounterID` UInt32,
    `StartDate` Date,
    `Sign` Int8,
    `IsNew` UInt8,
    `VisitID` UInt64,
    `UserID` UInt64,
    `StartTime` DateTime,
    `Duration` UInt32,
    `UTCStartTime` DateTime,
    `PageViews` Int32,
    `Hits` Int32,
     ...
    `IslandID` FixedString(16)
  )
ENGINE = CollapsingMergeTree(Sign)
PARTITION BY toYYYYMM(StartDate)
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID);

#2,下载测试数据:
curl https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
curl https://datasets.clickhouse.com/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv

[root@c73 tt]# ls -lh
-rw-r--r-- 1 root root  7.3G Dec 24 17:59 hits_v1.tsv
-rw-r--r-- 1 root root  2.5G Dec 28 10:47 visits_v1.tsv

clickhouse-client  -h c73 --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv
clickhouse-client  -h c73 --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv

#3,测试大数据量表的查询速度
[root@c73 tt]# clickhouse-client  -h c73 -mn
c73 :) select count(1) from tutorial.hits_v1;
Query id: e326e9cb-ca05-45ec-a0e7-409552f4417a
┌─count()─┐
│ 8873898 │
└─────────┘
1 rows in set. Elapsed: 0.002 sec.

c73 :) select count(1) from tutorial.visits_v1;
Query id: 1e29469b-9f75-4e63-b315-3f52c73fa4ec9
┌─count()─┐
│ 1680531 │
└─────────┘
1 rows in set. Elapsed: 0.003 sec.

c73 :) SELECt StartURL AS URL, AVG(Duration) AS AvgDuration
		FROM tutorial.visits_v1
		WHERe (StartDate >= '2014-03-23') AND (StartDate <= '2014-03-30')
		GROUP BY URL
		ORDER BY AvgDuration DESC
		LIMIT 2;
Query id: 7970fff2-9ccf-4853-90d5-7c6e90bf2a14
┌─URL──────────────────────────────────────────────┬─AvgDuration─┐
│ http://itpalanija-pri-patrivative=0&ads_app_user │       60127 │
│ http://renaul-myd-ukraine                        │       58938 │
└──────────────────────────────────────────────────┴─────────────┘
2 rows in set. Elapsed: 0.139 sec. Processed 1.47 million rows, 113.13 MB (10.59 million rows/s., 815.39 MB/s.)

c73 :) SELECt
			sum(Sign) AS visits,
			sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits,
			(100. * goal_visits) / visits AS goal_percent
		FROM tutorial.visits_v1
		WHERe (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru')
Query id: 62569d93-532e-4e8e-a87f-5216f420647e
┌─visits─┬─goal_visits─┬──────goal_percent─┐
│  10543 │        8553 │ 81.12491700654462 │
└────────┴─────────────┴───────────────────┘
1 rows in set. Elapsed: 0.410 sec. Processed 47.52 thousand rows, 6.00 MB (115.90 thousand rows/s., 14.64 MB/s.)
c, 配置集群
  • 集群名称: test
  • 集群节点:192.168.56.73:9000, 192.168.56.7:9000

集群配置(所有节点需要同步配置):vim /etc/clickhouse-server/config.xml

    
     0.0.0.0

    
    
         
             
                 
                     192.168.56.73
                     9000
                 
             
             
                 
                     192.168.56.7
                     9000
                 
             
        
    
	
	
    
        
            192.168.56.73
            2181
        
    

重启所有节点,查看集群信息:

d, 通过mysql协议连接clickhouse
[root@c7 ~]# grep mysql /etc/clickhouse-server/config.xml
    9004
         
[root@c7 ~]# mysql -udefault -p -P9004 -hc7
Enter password:
Welcome to the MySQL monitor.  Commands end with ; or g.
Your MySQL connection id is 6
Server version: 20.8.3.18-ClickHouse 0
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or 'h' for help. Type 'c' to clear the current input statement.

mysql> show databases;
+--------------------------------+
| name                           |
+--------------------------------+
| _temporary_and_external_tables |
| default                        |
| system                         |
| test1                          |
+--------------------------------+
3 rows in set (0.00 sec)

mysql> create database test2;
Query OK, 0 rows affected (0.01 sec)

mysql> use test2;
Database changed

mysql> create table t1(id int);
ERROR 62 (00000): Code: 62, e.displayText() = DB::Exception: Syntax error: failed at position 24 (end of query): . Expected one of: ENGINE, storage definition (version 20.8.3.18)
mysql> create table t1(id int) engine Memory;
Query OK, 0 rows affected (0.00 sec)

mysql> insert into t1 values(1);
Query OK, 1 row affected (0.00 sec)
ead 1 rows, 4.00 B in 0.000141798 sec., 7052 rows/sec., 27.55 KiB/sec.

mysql> select * from t1;
+------+
| id   |
+------+
|    1 |
+------+
1 row in set (0.00 sec)

mysql> update t1 set id=11;
ERROR 62 (00000): Code: 62, e.displayText() = DB::Exception: Syntax error: failed at position 1 ('update'): update t1 set id=11. Expected one of: Query, SHOW, SELECT, CREATE, DROP, TRUNCATE, OPTIMIZE TABLE, CREATE POLICY, ALTER POLICY, CREATE PROFILE, ALTER PROFILE, SYSTEM, DESCRIBE, DETACH, ATTACH, ALTER QUOTA, CREATE QUOTA, ALTER ROLE, CREATE ROLE, ALTER ROW POLICY, CREATE ROW POLICY, ALTER SETTINGS PROFILE, CREATE SETTINGS PROFILE, ALTER USER, CREATE USER, GRANT, REVOKE, SET ROLE, SET ROLE DEFAULT, SET DEFAULT ROLE, SHOW
mysql> delete from t1 where id=1;
ERROR 62 (00000): Code: 62, e.displayText() = DB::Exception: Syntax error: failed at position 1 ('delete'): delete from t1 where id=1. Expected one of: Query, SHOW, SELECt, CREATE, DROP, TRUNCATE, OPTIMIZE TABLE, CREATE POLICY, ALTER POLICY, CREATE PROFILE, ALTER PROFILE, SYSTEM, DESCRIBE, DETACH, ATTACH, ALTER QUOTA, CREATE QUOTA, ALTER ROLE, CREATE ROLE, ALTER ROW POLICY, CREATE ROW POLICY, ALTER SETTINGS PROFILE, CREATE SETTINGS PROFILE, ALTER USER, CREATE USER, GRANT, REVOKE, SET ROLE, SET ROLE DEFAULT, SET DEFAULT ROLE
2, presto

presto 客户端下载:https://download.csdn.net/download/eyeofeagle/71994558
presto 数据源连接器:https://prestodb.io/docs/current/connector.html
presto hive kerberos :https://cloud.tencent.com/developer/article/1158362

a, 连接hive
#配置hive连接( 文件名就是catalog名称 )
[root@daas9 ~]# cat /export/presto-server/etc/catalog/hive.properties
connector.name=hive-hadoop2
hive.metastore.uri=thrift://10.1.0.11:9083
hive.config.resources=/export/common/hadoop/conf/hdfs-site.xml,/export/common/hadoop/conf/core-site.xml,/export/common/hive/conf/hive-site.xml
hive.hdfs.impersonation.enabled=true
hive.metastore.authentication.type=NONE

#登录presto客户端,测试sql语句
[root@daas9 ~]# su hdfs
[root@daas9 ~]# chmod +x /tmp/presto-cli-0.266.1-executable.jar 
[hdfs@daas9 root]$ /tmp/presto-cli-0.266.1-executable.jar --server localhost:8081
presto> show catalogs;
 Catalog 
---------
 hive    
 system  
(3 rows)
Query 20211228_085013_00028_5hy6d, FINISHED, 1 node
Splits: 19 total, 19 done (100.00%)
0:01 [0 rows, 0B] [0 rows/s, 0B/s]

presto> show schemas from hive;
       Schema       
--------------------
 default            
 information_schema 
 t1                 
(3 rows)
Query 20211228_085035_00029_5hy6d, FINISHED, 2 nodes
Splits: 19 total, 19 done (100.00%)
400ms [3 rows, 42B] [7 rows/s, 105B/s]

presto> show tables from hive.t1;
 Table 
-------
 per   
(1 row)
Query 20211228_085049_00030_5hy6d, FINISHED, 2 nodes
Splits: 19 total, 19 done (100.00%)
423ms [1 rows, 15B] [2 rows/s, 35B/s]

presto> select * from hive.t1.per;
 id 
----
  1 
  2 
(2 rows)
Query 20211228_085059_00031_5hy6d, FINISHED, 1 node
Splits: 18 total, 18 done (100.00%)
0:02 [2 rows, 24B] [1 rows/s, 14B/s]

#插入数据,并查询
presto> insert into hive.t1.per values(3);
INSERT: 1 row
Query 20211228_085208_00032_5hy6d, FINISHED, 2 nodes
Splits: 35 total, 35 done (100.00%)
0:01 [0 rows, 0B] [0 rows/s, 0B/s]

presto> select * from hive.t1.per;
 id 
----
  1 
  3 
  2 
(3 rows)
Query 20211228_085212_00033_5hy6d, FINISHED, 1 node
Splits: 19 total, 19 done (100.00%)
185ms [3 rows, 46B] [16 rows/s, 248B/s]

欢迎分享,转载请注明来源:内存溢出

原文地址: https://www.outofmemory.cn/zaji/5680967.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-17
下一篇 2022-12-17

发表评论

登录后才能评论

评论列表(0条)

保存