`DECLARE dt_inicio_consumo DATE DEFAULT '2021-01-01';
DECLARE dt_fim_consumo DATE DEFAULT '2023-12-31'; --DATE DEFAULT date_sub(current_date, INTERVAL 1 DAY);
INSERT INTO gglobo-prodservdig-hdg-prd.dashboard_g1.mc_dash_stage_foundation_pageviews_raw
WITH
raw as (
SELECT data as dt_consumo
, IFNULL(globoId,fullVisitorId) as gid_enriquecido
, CASE
WHEN pagePath LIKE '%/google/amp/%' THEN regexp_extract(regexp_extract(REPLACE(pagePath,'/google/amp/','/'), "..ghtml"), 'g1.globo.')
WHEN pagePath LIKE '%g1.globo.com&ambient=amp&origin_url%'THEN regexp_extract(regexp_extract(REPLACE(pagePath,'g1.globo.com&ambient=amp&origin_url=https://',''), "..ghtml"), 'g1.globo.')
ELSE regexp_extract(regexp_extract(pagePath, "..ghtml"), 'g1.globo.')
END AS ds_url
, tipoConsumo
, dispositivo
, ambienteConsumo
, provider
, tempoNaPagina
, sourceMedium
, screenName
, hitNumber
, FIRST_VALUE(h.pagePath) OVER (PARTITION BY sessionId ORDER BY h.hitnumber ASC) AS landing_page
, FIRST_VALUE(h.screenName) OVER (PARTITION BY sessionId ORDER BY h.hitnumber ASC) AS landing_screen
, LAG(IF(h.tipoConsumo IN ('pageview'), h.pagePath, NULL)) OVER (PARTITION BY sessionId, h.tipoConsumo ORDER BY h.hitnumber ASC) AS previous_page
, REGEXP_EXTRACT(h.pagePath, r'([^.]+)') AS object_ajust
, PERCENTILE_CONT(tempoNaPagina, 0.5) OVER(PARTITION BY LOWER(SPLIT(SPLIT(REPLACE(pagePath,'/google/amp/','/'),'.globo.com')[SAFE_OFFSET(1)],'.')[SAFE_OFFSET(0)])) AS Mediana_TempoNaPagina_com0
, PERCENTILE_CONT(CASE WHEN tempoNaPagina >0 THEN tempoNaPagina ELSE NULL END, 0.5) OVER(PARTITION BY LOWER(SPLIT(SPLIT(REPLACE(pagePath,'/google/amp/','/'),'.globo.com')[SAFE_OFFSET(1)],'.')[SAFE_OFFSET(0)])) AS Mediana_TempoNaPagina_sem0
FROM gglobo-prodservdig-hdg-prd.foundation_g1.foundation_g1_sessions_v3 as a, a.hits as h
WHERE data between dt_inicio_consumo and dt_fim_consumo
AND tipoConsumo in ('pageview')
),
raw_n1 as (
select
*,
gglobo-prodservdig-hdg-prd.foundation_geral.udf_origem_externa_n1(sourceMedium,screenName) as origemExternaN1
from
raw
),
raw_n2 as (
select
*,
gglobo-prodservdig-hdg-prd.foundation_geral.udf_origem_externa_n2(origemExternaN1, landing_page, landing_screen, sourceMedium, screenName) as origemExternaN2
from
raw_n1
),
raw_n3 as (
SELECT
*,
gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n1(ds_url) as editoriaN1,
gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n2(gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n1(ds_url), ds_url) as editoriaN2,
gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n3(gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n2(gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n1(ds_url), ds_url), ds_url) as editoriaN3
FROM raw_n2
)
SELECT
dt_consumo
, ds_url
, CASE
WHEN editoriaN1 LIKE "Outros - Especiais - Guia de Compras" THEN "Editorias - Guia de Compras"
ELSE editoriaN1
END AS ds_editoria
, editoriaN2 as ds_sub_editoria
, CASE
WHEN ds_url like "%/pop-arte/musica/blog/mauro-ferreira/%" THEN "Editorias - Pop & Arte | Música | Blog do Mauro Ferreira"
ELSE editoriaN3
END as ds_sub_editoria_n3
, regexpextract(EditoriaN1, r'^[a-zA-Z0-9.+-]+') as ds_grupo_editoria
, COUNT(1) AS pageviews
, COUNTIF(dispositivo='mobile') AS Pageviews_mobile
, COUNTIF(dispositivo='desktop') AS Pageviews_desktop
, COUNTIF(ambienteConsumo='web') AS Pageviews_web
, COUNTIF(ambienteConsumo='app') AS Pageviews_app
, COUNTIF(ambienteConsumo='amp') AS Pageviews_amp
, COUNTIF(provider='cadun') AS Pageviews_logados
, COUNTIF(provider='anonymous') AS Pageviewss_anonimos
, COUNTIF(origemExternaN1 = 'Direto' AND origemExternaN2 = 'Home') AS Pageviews_direto
, COUNTIF(origemExternaN1 = 'Direto' AND origemExternaN2 = 'Dark Social') AS PageviewsDarkSocial
, COUNTIF(origemExternaN1 = 'Globo' AND origemExternaN2 = 'Globo.com') AS PageviewsHomeGlobo
, COUNTIF(origemExternaN1 = 'Google') AS PageviewsGoogle
, COUNTIF(origemExternaN1 = 'Push') AS Pageviews_push
, COUNTIF(origemExternaN1 = 'Redes Sociais') AS Pageviews_redes_sociais
, SUM(tempoNaPagina) AS Tempo_na_pagina
, AVG(tempoNaPagina) AS Media_Tempo_na_pagina_com0
, AVG(CASE WHEN tempoNaPagina > 0 THEN tempoNaPagina ELSE NULL END) AS Media_Tempo_na_pagina_sem0
, ANY_VALUE(Mediana_TempoNaPagina_com0) AS Mediana_Tempo_na_pagina_com0
, ANY_VALUE(Mediana_TempoNaPagina_sem0) AS Mediana_TempoNaPagina_sem0
FROM raw_n3
WHERE ds_url IS NOT NULL
GROUP BY 1,2,3,4,5,6;`
`DECLARE dt_inicio_consumo DATE DEFAULT '2021-01-01'; DECLARE dt_fim_consumo DATE DEFAULT '2023-12-31'; --DATE DEFAULT date_sub(current_date, INTERVAL 1 DAY);
INSERT INTO
gglobo-prodservdig-hdg-prd.dashboard_g1.mc_dash_stage_foundation_pageviews_raw
WITH
raw as ( SELECT data as dt_consumo , IFNULL(globoId,fullVisitorId) as gid_enriquecido , CASE WHEN pagePath LIKE '%/google/amp/%' THEN regexp_extract(regexp_extract(REPLACE(pagePath,'/google/amp/','/'), "..ghtml"), 'g1.globo.') WHEN pagePath LIKE '%g1.globo.com&ambient=amp&origin_url%'THEN regexp_extract(regexp_extract(REPLACE(pagePath,'g1.globo.com&ambient=amp&origin_url=https://',''), "..ghtml"), 'g1.globo.') ELSE regexp_extract(regexp_extract(pagePath, "..ghtml"), 'g1.globo.')
END AS ds_url , tipoConsumo , dispositivo , ambienteConsumo , provider , tempoNaPagina , sourceMedium , screenName , hitNumber , FIRST_VALUE(h.pagePath) OVER (PARTITION BY sessionId ORDER BY h.hitnumber ASC) AS landing_page , FIRST_VALUE(h.screenName) OVER (PARTITION BY sessionId ORDER BY h.hitnumber ASC) AS landing_screen , LAG(IF(h.tipoConsumo IN ('pageview'), h.pagePath, NULL)) OVER (PARTITION BY sessionId, h.tipoConsumo ORDER BY h.hitnumber ASC) AS previous_page , REGEXP_EXTRACT(h.pagePath, r'([^.]+)') AS object_ajust , PERCENTILE_CONT(tempoNaPagina, 0.5) OVER(PARTITION BY LOWER(SPLIT(SPLIT(REPLACE(pagePath,'/google/amp/','/'),'.globo.com')[SAFE_OFFSET(1)],'.')[SAFE_OFFSET(0)])) AS Mediana_TempoNaPagina_com0 , PERCENTILE_CONT(CASE WHEN tempoNaPagina >0 THEN tempoNaPagina ELSE NULL END, 0.5) OVER(PARTITION BY LOWER(SPLIT(SPLIT(REPLACE(pagePath,'/google/amp/','/'),'.globo.com')[SAFE_OFFSET(1)],'.')[SAFE_OFFSET(0)])) AS Mediana_TempoNaPagina_sem0 FROM
gglobo-prodservdig-hdg-prd.foundation_g1.foundation_g1_sessions_v3
as a, a.hits as h WHERE data between dt_inicio_consumo and dt_fim_consumo AND tipoConsumo in ('pageview') ),raw_n1 as ( select *,
gglobo-prodservdig-hdg-prd.foundation_geral.udf_origem_externa_n1
(sourceMedium,screenName) as origemExternaN1 from raw ),raw_n2 as ( select *,
gglobo-prodservdig-hdg-prd.foundation_geral.udf_origem_externa_n2
(origemExternaN1, landing_page, landing_screen, sourceMedium, screenName) as origemExternaN2 from raw_n1 ),raw_n3 as (
SELECT
*,
gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n1
(ds_url) as editoriaN1,gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n2
(gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n1
(ds_url), ds_url) as editoriaN2,gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n3
(gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n2
(gglobo-prodservdig-hdg-prd.foundation_g1.udf_editoria_n1
(ds_url), ds_url), ds_url) as editoriaN3 FROM raw_n2)
SELECT dt_consumo , ds_url , CASE WHEN editoriaN1 LIKE "Outros - Especiais - Guia de Compras" THEN "Editorias - Guia de Compras" ELSE editoriaN1 END AS ds_editoria
, editoriaN2 as ds_sub_editoria , CASE WHEN ds_url like "%/pop-arte/musica/blog/mauro-ferreira/%" THEN "Editorias - Pop & Arte | Música | Blog do Mauro Ferreira" ELSE editoriaN3 END as ds_sub_editoria_n3 , regexpextract(EditoriaN1, r'^[a-zA-Z0-9.+-]+') as ds_grupo_editoria , COUNT(1) AS pageviews , COUNTIF(dispositivo='mobile') AS Pageviews_mobile , COUNTIF(dispositivo='desktop') AS Pageviews_desktop , COUNTIF(ambienteConsumo='web') AS Pageviews_web , COUNTIF(ambienteConsumo='app') AS Pageviews_app , COUNTIF(ambienteConsumo='amp') AS Pageviews_amp , COUNTIF(provider='cadun') AS Pageviews_logados , COUNTIF(provider='anonymous') AS Pageviewss_anonimos , COUNTIF(origemExternaN1 = 'Direto' AND origemExternaN2 = 'Home') AS Pageviews_direto , COUNTIF(origemExternaN1 = 'Direto' AND origemExternaN2 = 'Dark Social') AS PageviewsDarkSocial , COUNTIF(origemExternaN1 = 'Globo' AND origemExternaN2 = 'Globo.com') AS PageviewsHomeGlobo , COUNTIF(origemExternaN1 = 'Google') AS PageviewsGoogle , COUNTIF(origemExternaN1 = 'Push') AS Pageviews_push , COUNTIF(origemExternaN1 = 'Redes Sociais') AS Pageviews_redes_sociais , SUM(tempoNaPagina) AS Tempo_na_pagina , AVG(tempoNaPagina) AS Media_Tempo_na_pagina_com0 , AVG(CASE WHEN tempoNaPagina > 0 THEN tempoNaPagina ELSE NULL END) AS Media_Tempo_na_pagina_sem0 , ANY_VALUE(Mediana_TempoNaPagina_com0) AS Mediana_Tempo_na_pagina_com0 , ANY_VALUE(Mediana_TempoNaPagina_sem0) AS Mediana_TempoNaPagina_sem0 FROM raw_n3 WHERE ds_url IS NOT NULL GROUP BY 1,2,3,4,5,6;`