Open Fionn88 opened 8 months ago
Q:上游資料表跟下游資料表,這裡的傳輸資料,通常業界是怎麼處理的呢?例如用 SQL 內部的 script 還是? A:用 Airflow 去做排程 Extract 資料
--[跟著做1] --users這張表有幾筆資料? --其中gender = 'FEMALE'有幾筆? --其中is_graduate = TRUE有幾筆? --其中graduate_time <= '2022-08-01'有幾筆?
SELECT
*
FROM
users
WHERE
gender = 'FEMALE' -- 以下根據題意自行增減條件
AND is_graduate = TRUE
AND graduate_time <= '2022-08-01'
;
--名字是'趙雅文'的同學, 編號是幾號? --誰先畢業?
SELECT
name, id, graduate_time
FROM
users
WHERE
name = '趙雅文'
ORDER BY
graduate_time
;
--名字有'雅'這個字 --其中有填寫畢業日期的 --其中有編號前五小的
SELECT
name, id, graduate_time
FROM
users
WHERE
name LIKE '%雅%' -- 以下根據題意自行增減條件
AND graduate_time IS NOT NULL
ORDER BY
id ASC
LIMIT 5
;
--[小試身手1] --在2022年註冊的使用者有幾位? --女生/男生各幾位? --畢業/在校各幾位?
SELECT
name, id, created_at, gender, is_graduate
FROM
users
WHERE
created_at BETWEEN '2022-01-01' AND '2022-12-31' -- 以下根據題意自行增減條件
-- AND gender = 'MALE'
-- AND gender = 'FEMALE'
-- AND is_graduate = TRUE
-- AND is_graduate = FALSE
;
--兩班的專案成果數分別為何? --projects表中, course_id = 1 --projects表中, course_id = 2
SELECT
id, name, course_id
FROM
projects
WHERE
course_id = 1
-- course_id = 2
;
--兩班的專案成果數分別為何? --contests表中, course_id = 1 --contests表中, course_id = 2
SELECT
id, name, course_id
FROM
contests
WHERE
course_id = 1
-- course_id = 2
;
SELECT
MIN(graduate_time) AS min_graduate_time,
MAX(graduate_time) AS max_graduate_time,
count(1) AS cnt,
count(DISTINCT gender) AS cnt_is_graduate
FROM
users
;
SELECT
MIN(graduate_time) AS min_graduate_time,
MAX(graduate_time) AS max_graduate_time,
count(*) AS count_data,
count(DISTINCT is_graduate) AS cnt_is_graduate,
count(DISTINCT gender) AS cnt_gender
FROM
users
;
SELECT
SUM(completeness) AS sum_comp,
AVG(completeness) AS avg_comp,
SUM(completeness)/count(completeness)::DECIMAL
FROM
project_scores ps
;
SELECT
DISTINCT completeness
FROM
project_scores;
--【使用者】 --學習者的年齡上下界 --資料總筆數有幾筆 --「性別」有幾種可能 --「是否畢業」有幾種可能
SELECT
MIN(graduate_time) AS min_graduate_time,
MAX(graduate_time) AS max_graduate_time,
COUNT(1) AS count_users,
COUNT(DISTINCT is_graduate) AS count_graduate_types,
COUNT(DISTINCT gender) AS count_gender_types
FROM
users
;
--【學員成果】 --完整度給分總和 --完整度給分平均
SELECT
SUM(completeness) AS sum_comp,
AVG(completeness) AS avg_comp
FROM
project_scores
;
--【進階思考】 --每個專案被評過幾次分? --每個專案被幾個人評過分? --兩者相減為重複評分
SELECT
project_id,
COUNT(1) AS count_scores,
COUNT(DISTINCT scorer_id) AS count_users,
(COUNT(1) - COUNT(DISTINCT scorer_id)) AS count_duplication
FROM
project_scores
GROUP BY
project_id
ORDER BY
(COUNT(1) - COUNT(DISTINCT scorer_id)) DESC
;
--每個維度的平均分數?
SELECT
AVG(completeness) AS avg_comp,
AVG(innovativeness) AS avg_inno,
AVG(technique) AS avg_tech,
AVG(practicality) AS avg_prac,
AVG(visualization) AS avg_visu
FROM
project_scores
;
SELECT
project_id,
COUNT(1) AS total_cnt
COUNT(DISTINCT score_id) as cnt
FROM
project_scores
GROUP BY
project_id
ORDER BY
(COUNT(1) - COUNT(DISTINCT scorer_id)) DESC