Flink去重计数统计用户数

1.数据

订单表,分别是店铺id、用户id和支付金额

"店铺id,用户id,支付金额",
"shop-1,user-1,1",
"shop-1,user-2,1",
"shop-1,user-2,1",
"shop-1,user-3,1",
"shop-1,user-3,1",
"shop-1,user-1,1",
"shop-1,user-2,1",
"shop-1,user-4,1",
"shop-2,user-4,1",
"shop-2,user-4,1",
"shop-2,user-2,1"

2.可运行案例

import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;


public class Test03 {
    public static void main(String[] args) throws Exception {
        // 1. 创建流式执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 2.创建表执行环境
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        // 3.读取数据源
        SingleOutputStreamOperator<String> jsonStream = env
                .fromElements("shop-1,user-1,1",
                        "shop-1,user-2,1",
                        "shop-1,user-2,1",
                        "shop-1,user-3,1",
                        "shop-1,user-3,1",
                        "shop-1,user-1,1",
                        "shop-1,user-2,1",
                        "shop-1,user-4,1",
                        "shop-2,user-4,1",
                        "shop-2,user-4,1",
                        "shop-2,user-2,1"
                );
        // 4.流转换为表
        Table table = tableEnv.fromDataStream(jsonStream);

        // 5. 把注册为一个临时视图
        tableEnv.createTemporaryView("tableTmp", table);

        // 6.求每个商店的用户数
        Table table1 = tableEnv.sqlQuery("select shop_id,sum(num) as num,sum(gmv) as gmv from (select shop_id,user_id, 1 as num,sum(gmv) as gmv from (select SPLIT_INDEX(f0,',',0) as shop_id,SPLIT_INDEX(f0,',',1) as user_id,cast(SPLIT_INDEX(f0,',',2) as bigint) as gmv from tableTmp) t1 group by shop_id,user_id) t2 group by shop_id");

        // 7.打印
        tableEnv.toRetractStream(table1, Row.class).print(">>>>>>");

        // 8.执行
        env.execute("test");
    }
}

sql:

select
  shop_id,
  sum(num) as num,
  sum(gmv) as gmv
from
  (
    select
      shop_id,
      user_id,
      1 as num,
      sum(gmv) as gmv
    from
      (
        select
          SPLIT_INDEX(f0, ',', 0) as shop_id,
          SPLIT_INDEX(f0, ',', 1) as user_id,
          cast(SPLIT_INDEX(f0, ',', 2) as bigint) as gmv
        from
          tableTmp
      ) t1
    group by
      shop_id,
      user_id
  ) t2
group by
  shop_id

3.运行结果

>>>>>>:7> (true,+U[shop-2, 2, 3])

>>>>>>:1> (true,+U[shop-1, 4, 8])  

>>>>>>:7> (true,+I[shop-2, 1, 1])
>>>>>>:1> (true,+I[shop-1, 1, 1])
>>>>>>:1> (false,-U[shop-1, 1, 1])
>>>>>>:7> (false,-U[shop-2, 1, 1])
>>>>>>:1> (true,+U[shop-1, 2, 2])
>>>>>>:7> (true,+U[shop-2, 2, 2])
>>>>>>:1> (false,-U[shop-1, 2, 2])
>>>>>>:7> (false,-U[shop-2, 2, 2])
>>>>>>:1> (true,+U[shop-1, 1, 1])
>>>>>>:7> (true,+U[shop-2, 1, 1])
>>>>>>:1> (false,-U[shop-1, 1, 1])
>>>>>>:7> (false,-U[shop-2, 1, 1])
>>>>>>:7> (true,+U[shop-2, 2, 3])
>>>>>>:1> (true,+U[shop-1, 2, 3])
>>>>>>:1> (false,-U[shop-1, 2, 3])
>>>>>>:1> (true,+U[shop-1, 3, 4])
>>>>>>:1> (false,-U[shop-1, 3, 4])
>>>>>>:1> (true,+U[shop-1, 2, 3])
>>>>>>:1> (false,-U[shop-1, 2, 3])
>>>>>>:1> (true,+U[shop-1, 3, 5])
>>>>>>:1> (false,-U[shop-1, 3, 5])
>>>>>>:1> (true,+U[shop-1, 2, 3])
>>>>>>:1> (false,-U[shop-1, 2, 3])
>>>>>>:1> (true,+U[shop-1, 3, 6])
>>>>>>:1> (false,-U[shop-1, 3, 6])
>>>>>>:1> (true,+U[shop-1, 4, 7])
>>>>>>:1> (false,-U[shop-1, 4, 7])
>>>>>>:1> (true,+U[shop-1, 3, 6])
>>>>>>:1> (false,-U[shop-1, 3, 6])
>>>>>>:1> (true,+U[shop-1, 4, 8])

4.原理

Flink回撤流原理

相关推荐

  1. Flink计数统计用户数

    2023-12-26 13:10:05       70 阅读
  2. MySQL:统计总条数时

    2023-12-26 13:10:05       35 阅读
  3. flink分别使用FilterMap和ProcessFunction实现逻辑

    2023-12-26 13:10:05       55 阅读
  4. oracle

    2023-12-26 13:10:05       59 阅读
  5. js 数组

    2023-12-26 13:10:05       58 阅读
  6. ArrayList数组

    2023-12-26 13:10:05       25 阅读

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2023-12-26 13:10:05       94 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2023-12-26 13:10:05       101 阅读
  3. 在Django里面运行非项目文件

    2023-12-26 13:10:05       82 阅读
  4. Python语言-面向对象

    2023-12-26 13:10:05       91 阅读

热门阅读

  1. @NotNull,@NotEmpty,@NotBlank区别

    2023-12-26 13:10:05       57 阅读
  2. QString转LPCWSTR

    2023-12-26 13:10:05       72 阅读
  3. Mac 修改报错Permission denied、chmod命令详细用法

    2023-12-26 13:10:05       60 阅读
  4. Google模拟面试【面试】

    2023-12-26 13:10:05       58 阅读
  5. C++ opencv-3.4.1 提取不规则物体的轮廓

    2023-12-26 13:10:05       56 阅读
  6. 【ISP】ISP调试岗位分类

    2023-12-26 13:10:05       57 阅读
  7. Flutter——环境搭建(MAC版)

    2023-12-26 13:10:05       74 阅读