业务指标计算整理

汇总下常见的指标计算逻辑,不区分行业

流失回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# 指定了流失日期,在流失日期之后没有过活跃,则为流失用户
# DT为数仓日期(昨日)
SELECT login.user_id,DATEDIFF('DT',login.dt) churn
FROM (
# 用户不同流失日期的登陆信息
SELECT user_id,dt
FROM USER_LOGIN
WHERE dt IN ('-1日','-3日','-7日','-30日')
) login
LEFT JOIN (
# 用户最早流失前到现今的活跃信息
SELECT user_id,MAX(dt) dt
FROM USER_ACTIVE
WHERE dt >= '-30日' AND dt <= 'DT'
GROUP BY user_id
) active
ON login.user_id = active.user_id
AND active.dt > login.dt
WHERE active.user_id IS NULL


# 获取数仓日期的登陆信息,关联其前一日的流失信息,即可获取回归数据
SELECT login.user_id,churn.churn return
FROM (
# 用户登陆信息
SELECT user_id
FROM USER_LOGIN
WHERE dt = 'DT'
) login
JOIN (
# 用户登陆前一日的流失情况,churn其实就可以转化N日回归
SELECT user_id,churn
FROM USER_CHURN
WHERE dt = '-1日'
) churn
ON login.user_id = churn.user_id

留存

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 历史活跃在前,当前活跃在后,写入时,保证之前的留存不影响
# 只会一直基于历史活跃新增新的当日留存用户
# base_dt = 2021-10-01 / retention_dt = [2021-10-02 ... DT]
SELECT active.user_id,history.dt base_dt,active.dt retention_dt
FROM (
# 当日活跃信息
SELECT user_id,dt
FROM USER_ACTIVE
WHERE dt = 'DT'
) active
INNER JOIN (
# N日前活跃信息
SELECT user_id,dt
FROM USER_ACTIVE
WHERE dt IN ('-1日',...,'-30日')
) history
ON active.user_id = history.user_id

生命周期总价值(LTV)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# 获取LTV计算日期内所有新增用户,相较于计算日的LTV情况
SELECT dt
,SUM(pay) numerator
,COUNT(DISTINCT user_id) denominator
,'DT' static_dt
FROM (
# 新增用户的消费情况
SELECT register.user_id,pay.pay,register.dt
FROM (
# 指定LTV日期新增用户
SELECT user_id,dt
FROM USER_REGISTER
WHERE DT IN ('LTV1','',...,'LTV365')
) register
LEFT JOIN (
# 用户每日付费情况
SELECT user_id,SUM(pay_amt) pay,dt
FROM USER_PAY
WHERE dt >= 'LTV365' AND dt <= 'DT'
GROUP BY user_id,dt
) pay
ON register.user_id = pay.user_id
AND pay.dt >= u.dt
) detail
GROUP BY dt

ps:
附带一个小的知识点,建立RFM模型(R:最近一次消费,F:消费频率,M:消费金额)
基于上面LTV的中间结果,获取上一个月新增用户的付款频次及付款金额
以付款频次为横轴,付款金额为纵轴建立坐标系,将用户散落到坐标系上
根据付款频次以及付款金额阈值进行用户分群,可以分为4类