šš®šš®, šš»š®š¹ššš¶š°š & šš. Modern alternative to Snowflake. Cost-effective and simple for massive-scale analytics. https://databend.com
The precision issue is compared to snowflake, here only give how Databend to reproduce.
Note: all the result is output by bendsql and snowsql.
bendsql --query=""
snowsql --query "" -o output_format=tsv -o header=false -o timing=false -o friendly=false
DROP TABLE IF EXISTS customers;
CREATE TABLE customers (
customer_id INT UNSIGNED NOT NULL,
customer_name VARCHAR NOT NULL,
segment VARCHAR NOT NULL,
create_timestamp DATE NOT NULL,
active BOOLEAN NOT NULL
);
DROP TABLE IF EXISTS date_dim;
CREATE TABLE date_dim (
date_key DATE NOT NULL,
day_of_week TINYINT UNSIGNED NOT NULL,
month TINYINT UNSIGNED NOT NULL,
year SMALLINT UNSIGNED NOT NULL
);
DROP TABLE IF EXISTS products;
CREATE TABLE products
(
product_id INT UNSIGNED NOT NULL,
product_name VARCHAR NOT NULL,
price DECIMAL(10, 2) NOT NULL,
category VARCHAR NOT NULL
);
DROP TABLE IF EXISTS sales;
CREATE TABLE sales (
sale_id INT UNSIGNED NOT NULL,
product_id INT UNSIGNED NOT NULL,
customer_id INT UNSIGNED NOT NULL,
sale_date DATE NOT NULL,
quantity INT NOT NULL,
net_paid DECIMAL(10, 2) NOT NULL
);
CREATE STAGE IF NOT EXISTS wizardbend
URL = 's3://wizardbend/'
CONNECTION = (ALLOW_ANONYMOUS = 'true');
COPY INTO customers
FROM @wizardbend/selects/customers.parquet
FILE_FORMAT = (TYPE = parquet);
COPY INTO date_dim
FROM @wizardbend/selects/date_dim.parquet
FILE_FORMAT = (TYPE = parquet);
COPY INTO products
FROM @wizardbend/selects/products.parquet
FILE_FORMAT = (TYPE = parquet);
COPY INTO sales
FROM @wizardbend/selects/sales.parquet
FILE_FORMAT = (TYPE = parquet);
Queries
-- Query 3: Average sale amount per customer segment
SELECT c.segment, AVG(s.net_paid) AS avg_sale_amount
FROM sales s
JOIN customers c ON s.customer_id = c.customer_id
GROUP BY c.segment
ORDER BY avg_sale_amount DESC
-- Query 7: Average product price by category
SELECT p.category, AVG(p.price) AS avg_price
FROM products p
GROUP BY p.category
ORDER BY avg_price DESC
-- Query 13: Total sales and average quantity sold per product
SELECT p.product_id, p.product_name, SUM(s.net_paid) AS total_sales, AVG(s.quantity) AS avg_quantity_sold
FROM sales s
JOIN products p ON s.product_id = p.product_id
GROUP BY p.product_id, p.product_name
ORDER BY total_sales DESC
LIMIT 10
-- Query 21: Products with sales above average in their category
WITH CategoryAverage AS (
SELECT
p.category,
AVG(s.net_paid) AS avg_sales
FROM
sales s
JOIN
products p ON s.product_id = p.product_id
GROUP BY
p.category
)
SELECT
p.product_id,
p.product_name,
p.category,
SUM(s.net_paid) AS total_sales,
ca.avg_sales
FROM
sales s
JOIN
products p ON s.product_id = p.product_id
JOIN
CategoryAverage ca ON p.category = ca.category
GROUP BY
p.product_id, p.product_name, p.category, ca.avg_sales
HAVING
SUM(s.net_paid) > ca.avg_sales
ORDER BY
p.product_id, total_sales DESC LIMIT 10
Summary
The precision issue is compared to snowflake, here only give how Databend to reproduce. Note: all the result is output by bendsql and snowsql. bendsql --query=""
snowsql --query "" -o output_format=tsv -o header=false -o timing=false -o friendly=false
Setup
https://github.com/datafuselabs/wizard/blob/main/checksb/sql/selects/bend/setup.sql
Queries
Check scripts
The full check queries: https://github.com/datafuselabs/wizard/blob/main/checksb/sql/selects/check.sql