1 Star 10 Fork 16

栾昊/Flink_1.13

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
AverageTimestampExample.java 4.97 KB
一键复制 编辑 原始数据 按行查看 历史
dahuanhuan 提交于 2022-04-26 22:49 +08:00 . 聚合状态(AggregatingState)案例
package com.hao.chapter09;
import com.hao.chapter05.ClickSource;
import com.hao.chapter05.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.AggregatingState;
import org.apache.flink.api.common.state.AggregatingStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import java.sql.Timestamp;
import java.time.Duration;
public class AverageTimestampExample {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<Event> stream = env.addSource(new ClickSource())
.assignTimestampsAndWatermarks(
WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
.withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long recordTimestamp) {
return element.timestamp;
}
})
);
stream.print("input");
//自定义实现平均时间戳的统计
stream.keyBy(data -> data.user)
.flatMap(new AvgTsResult(5L))
.print();
env.execute();
}
//自定义实现RichFlatMapFunction
public static class AvgTsResult extends RichFlatMapFunction<Event, String> {
//自定义的窗口
private Long count;
public AvgTsResult(Long count) {
this.count = count;
}
//定义一个聚合状态,用来保存平均时间戳
private AggregatingState<Event, Long> avgTsAggState;
//定义一个值状态,保存用户访问的次数
private ValueState<Long> countState;
@Override
public void open(Configuration parameters) throws Exception {
//获取运行上下文
//ACC : Tuple2<Long,Long> : 第一个所有时间戳的和,第二个是当前的个数
//Out : 平均时间戳
avgTsAggState = getRuntimeContext().getAggregatingState(new AggregatingStateDescriptor<Event, Tuple2<Long, Long>, Long>(
"avg-ts",
new AggregateFunction<Event, Tuple2<Long, Long>, Long>() {
@Override
public Tuple2<Long, Long> createAccumulator() {
return Tuple2.of(0L, 0L);
}
@Override
public Tuple2<Long, Long> add(Event value, Tuple2<Long, Long> accumulator) {
return Tuple2.of(accumulator.f0 + value.timestamp, accumulator.f1 + 1);
}
@Override
public Long getResult(Tuple2<Long, Long> accumulator) {
return accumulator.f0 / accumulator.f1;
}
@Override
public Tuple2<Long, Long> merge(Tuple2<Long, Long> a, Tuple2<Long, Long> b) {
return null;
}
},
Types.TUPLE(Types.LONG, Types.LONG)
));
countState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("count", Long.class));
}
@Override
public void flatMap(Event value, Collector<String> out) throws Exception {
//每来一条数据 count 就 +1
Long currCount = countState.value();
//判断,如果状态里面没有值的话,就+1,如果有值的话,就++
if (currCount == null) {
currCount = 1L;
} else {
currCount++;
}
//更新状态
countState.update(currCount);
avgTsAggState.add(value);
//根据count,判断是否达到了累积的技术窗口的长度
if (currCount.equals(count)){
out.collect(value.user + "过去"+count+"次访问平均时间戳为:"+ new Timestamp(avgTsAggState.get()));
//清空状态
avgTsAggState.clear();
countState.clear();
}
}
}
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/luan_hao/Flink_1.13.git
git@gitee.com:luan_hao/Flink_1.13.git
luan_hao
Flink_1.13
Flink_1.13
master

搜索帮助