File: 206_gitlab_prep_geozone.sql

package info (click to toggle)
sqlfmt 0.29.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,580 kB
  • sloc: python: 10,007; sql: 5,626; makefile: 39
file content (120 lines) | stat: -rw-r--r-- 5,139 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# COPYRIGHT GITLAB, USED UNDER MIT LICENSE
# SEE: https://github.com/tconbeer/gitlab-analytics-sqlfmt/blob/9360d2f1986c37615926b0416e8d0fb23cae3e6e/LICENSE
WITH source AS (

    SELECT *
    FROM {{ ref('geozones_yaml_flatten_source') }}

),  grouping AS (

    SELECT
      geozone_title,
      IFF(country = 'United States', geozone_title || ', ' || country, geozone_title)             AS geozone_locality,
      'All, ' || country                                                                          AS country_locality,
      geozone_factor,
      country,
      state_or_province,
      valid_from,
      valid_to,
      is_current,
      /* Filling in NULLs with a value for the inequality check in the next step of the gaps and islands problem
      (finding groups based on when the factor changes and not just the value of the factor)
      */
      LAG(geozone_factor, 1, 0) OVER (PARTITION BY country,state_or_province ORDER BY valid_from) AS lag_factor,
      CONDITIONAL_TRUE_EVENT(geozone_factor != lag_factor)
                             OVER (PARTITION BY country,state_or_province ORDER BY valid_from)    AS locality_group,
      LEAD(valid_from,1) OVER (PARTITION BY country,state_or_province ORDER BY valid_from)        AS next_entry
    FROM source

), final AS (

    SELECT DISTINCT
      geozone_title,
      geozone_factor,
      geozone_locality,
      country_locality,
      country                                                                                                            AS geozone_country,
      state_or_province                                                                                                  AS geozone_state_or_province,
      MIN(valid_from) OVER (PARTITION BY country,state_or_province,locality_group)::DATE                                 AS first_file_date,
      MAX(valid_to) OVER (PARTITION BY country,state_or_province,locality_group)::DATE                                   AS last_file_date,
      -- Fixed date represents when location factor becan to be collected in source data.
      IFF(locality_group = 1, LEAST('2020-03-24',first_file_date),first_file_date)                                       AS valid_from,
      MAX(COALESCE(next_entry,{{ var('tomorrow') }})) OVER (PARTITION BY country,state_or_province,locality_group)::DATE AS valid_to,
      BOOLOR_AGG(is_current) OVER (PARTITION BY country,state_or_province,locality_group)                                AS is_current_file
    FROM grouping
    
)

SELECT *
FROM final
)))))__SQLFMT_OUTPUT__(((((
# COPYRIGHT GITLAB, USED UNDER MIT LICENSE
# SEE:
# https://github.com/tconbeer/gitlab-analytics-sqlfmt/blob/9360d2f1986c37615926b0416e8d0fb23cae3e6e/LICENSE
with
    source as (select * from {{ ref("geozones_yaml_flatten_source") }}),
    grouping as (

        select
            geozone_title,
            iff(
                country = 'United States',
                geozone_title || ', ' || country,
                geozone_title
            ) as geozone_locality,
            'All, ' || country as country_locality,
            geozone_factor,
            country,
            state_or_province,
            valid_from,
            valid_to,
            is_current,
            /* Filling in NULLs with a value for the inequality check in the next step of the gaps and islands problem
      (finding groups based on when the factor changes and not just the value of the factor)
      */
            lag(geozone_factor, 1, 0) over (
                partition by country, state_or_province order by valid_from
            ) as lag_factor,
            conditional_true_event(geozone_factor != lag_factor) over (
                partition by country, state_or_province order by valid_from
            ) as locality_group,
            lead(valid_from, 1) over (
                partition by country, state_or_province order by valid_from
            ) as next_entry
        from source

    ),
    final as (

        select distinct
            geozone_title,
            geozone_factor,
            geozone_locality,
            country_locality,
            country as geozone_country,
            state_or_province as geozone_state_or_province,
            min(valid_from) over (
                partition by country, state_or_province, locality_group
            )::date as first_file_date,
            max(valid_to) over (
                partition by country, state_or_province, locality_group
            )::date as last_file_date,
            -- Fixed date represents when location factor becan to be collected in
            -- source data.
            iff(
                locality_group = 1,
                least('2020-03-24', first_file_date),
                first_file_date
            ) as valid_from,
            max(coalesce(next_entry,{{ var("tomorrow") }})) over (
                partition by country, state_or_province, locality_group
            )::date as valid_to,
            boolor_agg(is_current) over (
                partition by country, state_or_province, locality_group
            ) as is_current_file
        from grouping

    )

select *
from final