Ai
1 Star 10 Fork 16

栾昊/Flink_1.13

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
WindowAggregateTest_PVUV.java 2.92 KB
一键复制 编辑 原始数据 按行查看 历史
dahuanhuan 提交于 2022-04-17 17:21 +08:00 . 函数案例 统计 PV/UV 这个比值
package com.hao.chapter06;
import com.hao.chapter05.ClickSource;
import com.hao.chapter05.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import java.time.Duration;
import java.util.HashSet;
public class WindowAggregateTest_PVUV {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 读取数据,并提取时间戳、生成水位线
DataStream<Event> stream = env.addSource(new ClickSource())
.assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
.withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long recordTimestamp) {
return element.timestamp;
}
}));
stream.print("data");
// 所有数据设置相同的 key,发送到同一个分区统计 PV 和 UV,再相除
stream.keyBy(data -> true)
.window(SlidingEventTimeWindows.of(Time.seconds(10), Time.seconds(2)))
.aggregate(new AvgPv())
.print();
env.execute();
}
//自定义一个AggregateFunction , 用Long保存pv个数 , 用HashSet做uv去重
public static class AvgPv implements AggregateFunction<Event, Tuple2<Long, HashSet<String>>, Double> {
@Override
public Tuple2<Long, HashSet<String>> createAccumulator() {
// 创建累加器
return Tuple2.of(0L, new HashSet<>());
}
@Override
public Tuple2<Long, HashSet<String>> add(Event value, Tuple2<Long, HashSet<String>> accumulator) {
// 属于本窗口的数据来一条累加一次,并返回累加器
accumulator.f1.add(value.user);
return Tuple2.of(accumulator.f0 + 1L, accumulator.f1);
}
@Override
public Double getResult(Tuple2<Long, HashSet<String>> accumulator) {
// 窗口闭合时,增量聚合结束,将计算结果发送到下游
return (double) accumulator.f0 / accumulator.f1.size();
}
@Override
public Tuple2<Long, HashSet<String>> merge(Tuple2<Long, HashSet<String>> a, Tuple2<Long, HashSet<String>> b) {
return null;
}
}
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/luan_hao/Flink_1.13.git
git@gitee.com:luan_hao/Flink_1.13.git
luan_hao
Flink_1.13
Flink_1.13
master

搜索帮助