八 Clickhouse 应用案例
1 用户和权限
在user.xml中添加用户配置
<?xml version="1.0"?> <yandex> <profiles> • <default> • <max_memory_usage>10000000000</max_memory_usage> • <use_uncompressed_cache>0</use_uncompressed_cache> • <load_balancing>random</load_balancing> • </default> • <readonly> • <readonly>1</readonly> • </readonly> </profiles> <users> • <default> • <password></password> • <networks incl="networks" replace="replace"> • <ip>::/0</ip> • </networks> • <profile>default</profile> • <quota>default</quota> • </default> <!-- 定义一个用户 用户名为hangge 密码使用明文 root --> <hangge> <password_sha256_hex>f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd</password_sha256_hex> • <networks incl="networks" replace="replace"> • <ip>::/0</ip> • </networks> • <profile>default</profile> • <quota>default</quota> • </hangge> </users> <quotas> • <default> • <interval> • <duration>3600</duration> • <queries>0</queries> • <errors>0</errors> • <result_rows>0</result_rows> • <read_rows>0</read_rows> • <execution_time>0</execution_time> • </interval> • </default> </quotas> </yandex>
明文密码
<password></password> 里面没有配置说明是没有密码
SHA256加密:
在使用SHA256加密算法的时候,需要通过password_sha256_hex标签定义密码
[root@ck1 ~]# echo -n hangge | openssl dgst -sha256 (stdin)= f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd
<hangge>
<password_sha256_hex>f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</hangge>
double_sha1加密:
在使用double_sha1加密算法的时候,则需要通过 password_double_sha1_hex标签定义密码,
<password_double_sha1_hex>23ae809ddacaf96af0fd78ed04b6a265e05aa257</password_double_sha1_hex> \# echo -n 123 | openssl dgst -sha1 -binary | openssl dgst -sha1 (stdin)= 23ae809ddacaf96af0fd78ed04b6a265e05aa257
用户权限控制'
<hangge>
<password_sha256_hex>60cd41aedc4e47e8883682b416109e7b7e345e15decc63c2c98ecdab5e8e053a</password_sha256_hex>
<networks incl="networks" />
<profile>readonly</profile>
<quota>default</quota>
<allow_databases>
<database>default</database>
</allow_databases>
</hangge>
## 3 JDBC和客户端工具
### 3.1 JDBC
•```xml
<!-- https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc -->
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.2.4</version>
</dependency>
public class Demo1 {
public static void main(String[] args) throws Exception {
Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
String url = "jdbc:clickhouse://linux01:8123/default";
String username = "default";
String password = "";
Connection con = DriverManager.getConnection(url, username, password);
Statement stmt = con.createStatement();
ResultSet resultSet = stmt.executeQuery("select * from tb_demo1");
while (resultSet.next()) {
int id = resultSet.getInt("id");
String name = resultSet.getString("name");
System.out.println(id + ":" + name);
}
con.close();
stmt.close();
resultSet.close();
}
}
高可用模式允许设置多个host地址,每次会从可用的地址中随机选择一个进行连接,在高可用模式下,需要通过BalancedClickhouseDataSource对象获取连接
public static void main(String[] args) throws Exception {
// 初始化驱动
Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
// url
String url = "jdbc:clickhouse://linux01:8123,linux02:8123,linux03:8123/default";
//设置JDBC参数
ClickHouseProperties clickHouseProperties = new ClickHouseProperties();
clickHouseProperties.setUser("default");
//声明数据源
BalancedClickhouseDataSource balanced = new BalancedClickhouseDataSource(url, clickHouseProperties);
//对每个host进行ping操作, 排除不可用的dead连接
balanced.actualize();
//获得JDBC连接
Connection con = balanced.getConnection();
Statement stmt = con.createStatement();
ResultSet resultSet = stmt.executeQuery("select * from demo3_all");
while (resultSet.next()) {
int id = resultSet.getInt("id");
String name = resultSet.getString("name");
System.out.println(id + ":" + name);
}
con.close();
stmt.close();
resultSet.close();
}
3.2 客户端工具DBeaver




4.1 windowFunnel函数
(参数一) 时间的单位 窗口的大小 时间的单位 (时间 , 事件链条)
uid1 event1 1551398404
uid1 event2 1551398406
uid1 event3 1551398408
uid2 event2 1551398412
uid2 event3 1551398415
uid3 event3 1551398410
uid3 event4 1551398413
————————————————
-- 建表
drop table if exists test_funnel ;
CREATE TABLE test_funnel(
uid String,
eventid String,
eventTime UInt64)
ENGINE = MergeTree
ORDER BY (uid, eventTime) ;
-- 导入数据
insert into test_funnel values
('uid1','event1',1551398404),
('uid1','event2',1551398406),
('uid1','event3',1551398408),
('uid2','event2',1551398412),
('uid2','event3',1551398415),
('uid3','event3',1551398410),
('uid3','event4',1551398413);
-- 查看数据
┌─uid──┬─eventid─┬──eventTime─┐
│ uid1 │ event1 │ 1551398404 │
│ uid1 │ event2 │ 1551398406 │
│ uid1 │ event3 │ 1551398408 │
│ uid2 │ event2 │ 1551398412 │
│ uid2 │ event3 │ 1551398415 │
│ uid3 │ event3 │ 1551398410 │
│ uid3 │ event4 │ 1551398413 │
└──────┴─────────┴────────────┘
select
uid ,
windowFunnel(4)(
toDateTime(eventTime),
eventid='event1' ,
eventid='event2' ,
eventid='event3'
) as funnel
from
test_funnel
group by uid ;
┌─uid──┬─funnel─┐
│ uid3 │ 0 │
│ uid1 │ 3 │
│ uid2 │ 0 │
└──────┴────────┘
select
uid ,
windowFunnel(4)(
toDateTime(eventTime),
eventid='event2' ,
eventid='event3'
) as funnel
from
test_funnel
group by uid ;
┌─uid──┬─funnel─┐
│ uid3 │ 0 │
│ uid1 │ 2 │
│ uid2 │ 2 │
└──────┴────────┘
案例
建表 导入数据
clickhouse-client -q 'insert into test1.ods_log format JSONAsString' < event.log drop table if exists test_log ; create table test_log engine=MergeTree() order by (id,ts) as with visitParamExtractUInt(line,'timeStamp') as ts , visitParamExtractString(line ,'account')as account, visitParamExtractString(line ,'deviceId')as deviceId, visitParamExtractString(line ,'sessionId')as sessionId, visitParamExtractString(line ,'ip')as ip, visitParamExtractString(line ,'eventId')as eventId, visitParamExtractRaw(line ,'properties')as properties select if(account='' , deviceId , account) id , account , deviceId, sessionId, ip, eventId, properties, ts from tb_ods_log ;
select
id ,
windowFunnel(100000)(
toDateTime(ts),
eventId='productView' ,
eventId='adClick' ,
eventId='productView' ,
eventId='collect'
) as funnel
from
test_log
group by id ;
4.2 sequenceCount
sequenceCount满足要求的次数
SELECT id,
sequenceCount('(?1)')
(
FROM_UNIXTIME(ts) ,
eventId='adShow' ,
eventId='productView' ,
eventId='collect' ,
eventId='addCart' )AS cnt
FROM test_log
GROUP BY id having id='0T7136zA3BZI';
4.3 sequenceMatch
这个函数都需要指定模式串、时间列和期望的事件序列(最多可指定32个事件)。模式串的语法有以下三种: (?N):表示时间序列中的第N个事件,从1开始。例如上述SQL中,(?2)即表示event_type = 'shtKkclick' AND column_type = 'homePage'。 (?t op secs):插入两个事件之间,表示它们发生时需要满足的时间条件(单位为秒)。例如上述SQL中,(?1)(?t<=15)(?2)即表示事件1和2发生的时间间隔在15秒以内。 .*:表示任意的非指定事件。
SELECT
id,
sequenceMatch('(?1)(?t<=10)(?2)(?3).*(?4)')(
FROM_UNIXTIME(ts) ,
eventId='adClick' ,
eventId='productView' ,
eventId='collect' ,
eventId='addCart'
) AS is_match
FROM log
GROUP BY id
having id='dGHDHV7WOrpJ';
SELECT
id,
sequenceCount('(?1)(?t<=10)(?2)(?3).*(?4)')(
FROM_UNIXTIME(ts) ,
eventId='adClick' ,
eventId='productView' ,
eventId='collect' ,
eventId='addCart'
) AS cnt
FROM test_log
GROUP BY id ;
having id='dGHDHV7WOrpJ 0T7136zA3BZI';
select id ,eventId from test_log
where id = 'dGHDHV7WOrpJ'

浙公网安备 33010602011771号