ZhangZhihui's Blog  

PostgreSQL: Generate Date Dimension

-- 1. Create table
CREATE TABLE date_dim (
    date_key         INT PRIMARY KEY,         -- e.g., 20250815
    date_actual      DATE NOT NULL,
    year             INT NOT NULL,
    quarter          INT NOT NULL,
    month            INT NOT NULL,
    month_name       VARCHAR(20) NOT NULL,
    day              INT NOT NULL,
    day_of_week      INT NOT NULL,            -- 1=Monday ... 7=Sunday
    day_name         VARCHAR(20) NOT NULL,
    week_of_year     INT NOT NULL,
    is_weekend       BOOLEAN NOT NULL,
    fiscal_year      INT NOT NULL,
    fiscal_quarter   INT NOT NULL,
    is_holiday       BOOLEAN NOT NULL DEFAULT FALSE
);

-- 2. Generate rows
INSERT INTO date_dim (
    date_key, date_actual, year, quarter, month, month_name, day,
    day_of_week, day_name, week_of_year, is_weekend,
    fiscal_year, fiscal_quarter, is_holiday
)
SELECT
    EXTRACT(YEAR FROM d)::INT * 10000
    + EXTRACT(MONTH FROM d)::INT * 100
    + EXTRACT(DAY FROM d)::INT AS date_key,
    d AS date_actual,
    EXTRACT(YEAR FROM d)::INT AS year,
    EXTRACT(QUARTER FROM d)::INT AS quarter,
    EXTRACT(MONTH FROM d)::INT AS month,
    TO_CHAR(d, 'Month') AS month_name,
    EXTRACT(DAY FROM d)::INT AS day,
    EXTRACT(ISODOW FROM d)::INT AS day_of_week,     -- ISO: Monday=1
    TO_CHAR(d, 'Day') AS day_name,
    EXTRACT(WEEK FROM d)::INT AS week_of_year,
    CASE WHEN EXTRACT(ISODOW FROM d) IN (6,7) THEN TRUE ELSE FALSE END AS is_weekend,

    -- Example fiscal year: starts in April
    CASE WHEN EXTRACT(MONTH FROM d) >= 4
         THEN EXTRACT(YEAR FROM d)::INT
         ELSE EXTRACT(YEAR FROM d)::INT - 1
    END AS fiscal_year,
    CASE
        WHEN EXTRACT(MONTH FROM d) BETWEEN 4 AND 6 THEN 1
        WHEN EXTRACT(MONTH FROM d) BETWEEN 7 AND 9 THEN 2
        WHEN EXTRACT(MONTH FROM d) BETWEEN 10 AND 12 THEN 3
        ELSE 4
    END AS fiscal_quarter,

    FALSE AS is_holiday  -- Can be updated later
FROM
    generate_series(DATE '1900-01-01', DATE '9999-12-31', INTERVAL '1 day') AS t(d);

-- 3. Optional: Create index for faster joins
CREATE INDEX idx_date_dim_date_key ON date_dim(date_key);

 


Notes

  • Holiday flag: You can update it afterward by joining with a holiday list table:

    UPDATE date_dim
    SET is_holiday = TRUE
    WHERE date_actual IN (SELECT holiday_date FROM holiday_list);

     

  • Fiscal year: The above example assumes fiscal years start in April. You can tweak that for your organization’s fiscal calendar.

  • Size: This script will insert ~2.9 million rows for 1900–9999. If you want a smaller table, just adjust the range in generate_series.

 

 

 

MySQL 8+ : Generate Date Dimension

-- 1. Create table
CREATE TABLE date_dim (
    date_key         INT PRIMARY KEY,         -- e.g., 20250815
    date_actual      DATE NOT NULL,
    year             INT NOT NULL,
    quarter          INT NOT NULL,
    month            INT NOT NULL,
    month_name       VARCHAR(20) NOT NULL,
    day              INT NOT NULL,
    day_of_week      INT NOT NULL,            -- 1=Monday ... 7=Sunday
    day_name         VARCHAR(20) NOT NULL,
    week_of_year     INT NOT NULL,
    is_weekend       BOOLEAN NOT NULL,
    fiscal_year      INT NOT NULL,
    fiscal_quarter   INT NOT NULL,
    is_holiday       BOOLEAN NOT NULL DEFAULT FALSE
);

-- 2. Recursive CTE to generate dates
WITH RECURSIVE all_dates AS (
    SELECT DATE('1900-01-01') AS d
    UNION ALL
    SELECT DATE_ADD(d, INTERVAL 1 DAY)
    FROM all_dates
    WHERE d < '9999-12-31'
)
INSERT INTO date_dim (
    date_key, date_actual, year, quarter, month, month_name, day,
    day_of_week, day_name, week_of_year, is_weekend,
    fiscal_year, fiscal_quarter, is_holiday
)
SELECT
    YEAR(d) * 10000 + MONTH(d) * 100 + DAY(d) AS date_key,
    d AS date_actual,
    YEAR(d) AS year,
    QUARTER(d) AS quarter,
    MONTH(d) AS month,
    MONTHNAME(d) AS month_name,
    DAY(d) AS day,
    DAYOFWEEK(d) AS day_of_week,  -- Sunday=1 ... Saturday=7 (MySQL default)
    DAYNAME(d) AS day_name,
    WEEK(d, 3) AS week_of_year,   -- Mode 3 = ISO weeks
    CASE WHEN DAYOFWEEK(d) IN (1,7) THEN TRUE ELSE FALSE END AS is_weekend,

    -- Example: Fiscal year starts in April
    CASE WHEN MONTH(d) >= 4 THEN YEAR(d) ELSE YEAR(d) - 1 END AS fiscal_year,
    CASE
        WHEN MONTH(d) BETWEEN 4 AND 6 THEN 1
        WHEN MONTH(d) BETWEEN 7 AND 9 THEN 2
        WHEN MONTH(d) BETWEEN 10 AND 12 THEN 3
        ELSE 4
    END AS fiscal_quarter,

    FALSE AS is_holiday
FROM all_dates
OPTION MAX_RECURSION 0;  -- MySQL allows unlimited recursion depth in 8+

-- 3. Index for joins
CREATE INDEX idx_date_dim_date_key ON date_dim(date_key);

 


Important Notes

    • Performance warning:
      Generating ~2.9 million rows for 1900–9999 in a recursive CTE can take a while in MySQL. If you only need, say, 1900–2100 (~73k rows), change:

      WHERE d < '2100-12-31'
    • DAYOFWEEK() difference:
      MySQL returns Sunday=1 … Saturday=7, unlike ISO Monday=1. If you want Monday=1, you can use:

      ((DAYOFWEEK(d) + 5) % 7) + 1 AS day_of_week
    • Holiday flag: Populate later by joining with your holiday list.

    • Fiscal year: Adjust start month in the CASE logic to match your business rules.

 

Here’s the optimized MySQL date dimension generator that:

  1. Generates only from 1900-01-01 to 2100-12-31 (≈73,000 rows)

  2. Adds a special sentinel row for 9999-12-31 for SCD2 “active until infinity” handling.


MySQL: Date Dimension (1900–2100 + 9999 Sentinel)

-- 1. Create the table
CREATE TABLE date_dim (
    date_key         INT PRIMARY KEY,         -- e.g., 20250815
    date_actual      DATE NOT NULL,
    year             INT NOT NULL,
    quarter          INT NOT NULL,
    month            INT NOT NULL,
    month_name       VARCHAR(20) NOT NULL,
    day              INT NOT NULL,
    day_of_week      INT NOT NULL,            -- 1=Monday ... 7=Sunday (ISO)
    day_name         VARCHAR(20) NOT NULL,
    week_of_year     INT NOT NULL,
    is_weekend       BOOLEAN NOT NULL,
    fiscal_year      INT NOT NULL,
    fiscal_quarter   INT NOT NULL,
    is_holiday       BOOLEAN NOT NULL DEFAULT FALSE
);

-- 2. Generate dates 1900-01-01 to 2100-12-31
INSERT INTO date_dim (
    date_key, date_actual, year, quarter, month, month_name, day,
    day_of_week, day_name, week_of_year, is_weekend,
    fiscal_year, fiscal_quarter, is_holiday
)
SELECT
    DATE_FORMAT(d, '%Y%m%d') + 0 AS date_key,
    d AS date_actual,
    YEAR(d) AS year,
    QUARTER(d) AS quarter,
    MONTH(d) AS month,
    MONTHNAME(d) AS month_name,
    DAY(d) AS day,
    ((DAYOFWEEK(d) + 5) % 7) + 1 AS day_of_week,  -- Monday=1 ... Sunday=7
    DAYNAME(d) AS day_name,
    WEEK(d, 3) AS week_of_year,
    CASE WHEN DAYOFWEEK(d) IN (1,7) THEN TRUE ELSE FALSE END AS is_weekend,

    -- Fiscal year starting in April
    CASE WHEN MONTH(d) >= 4 THEN YEAR(d) ELSE YEAR(d) - 1 END AS fiscal_year,
    CASE
        WHEN MONTH(d) BETWEEN 4 AND 6 THEN 1
        WHEN MONTH(d) BETWEEN 7 AND 9 THEN 2
        WHEN MONTH(d) BETWEEN 10 AND 12 THEN 3
        ELSE 4
    END AS fiscal_quarter,

    FALSE AS is_holiday
FROM (
    SELECT DATE_ADD('1900-01-01', INTERVAL n DAY) AS d
    FROM (
        SELECT a.a + b.a * 10 + c.a * 100 + d.a * 1000 + e.a * 10000 AS n
        FROM (SELECT 0 a UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
              UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6
              UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a
        CROSS JOIN (SELECT 0 a UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
                    UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6
                    UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
        CROSS JOIN (SELECT 0 a UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
                    UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6
                    UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) c
        CROSS JOIN (SELECT 0 a UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
                    UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6
                    UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d
        CROSS JOIN (SELECT 0 a UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
                    UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6
                    UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) e
    ) numbers
) dates
WHERE d <= '2100-12-31';

-- 3. Add the sentinel "infinity" row for SCD2
INSERT INTO date_dim (
    date_key, date_actual, year, quarter, month, month_name, day,
    day_of_week, day_name, week_of_year, is_weekend,
    fiscal_year, fiscal_quarter, is_holiday
)
VALUES (
    99991231,
    '9999-12-31',
    9999,
    4,
    12,
    'December',
    31,
    5,  -- Friday (for 9999-12-31)
    'Friday',
    52,
    FALSE,
    9999,
    4,
    FALSE
);

-- 4. Index for joins
CREATE INDEX idx_date_dim_date_key ON date_dim(date_key);

 


Why this is optimal for SCD2

  • Keeps the table small (~73k rows instead of millions)

  • Supports all realistic reporting dates

  • Has a special 9999-12-31 row for “active” SCD2 records

  • Joins are fast because date_key is an INT

  • Can still add fiscal calendars, holiday flags, etc.

 

Here’s a complete example of an SCD2 dimension table that uses the date_dim we just built. It shows how to store start_date_key and end_date_key pointing to date_dim, and how to query the “current” record.


1. Create the SCD2 dimension table

CREATE TABLE customer_dim (
    customer_key       INT PRIMARY KEY AUTO_INCREMENT,  -- surrogate key
    customer_id        INT NOT NULL,                    -- natural key
    name               VARCHAR(100),
    start_date_key     INT NOT NULL,                    -- FK to date_dim
    end_date_key       INT NOT NULL,                    -- FK to date_dim
    is_current         CHAR(1) NOT NULL DEFAULT 'Y',   -- 'Y' or 'N'
    CONSTRAINT fk_start_date FOREIGN KEY (start_date_key)
        REFERENCES date_dim(date_key),
    CONSTRAINT fk_end_date FOREIGN KEY (end_date_key)
        REFERENCES date_dim(date_key)
);

 


2. Insert example historical and current records

-- First version of customer
INSERT INTO customer_dim (customer_id, name, start_date_key, end_date_key, is_current)
VALUES (1001, 'Alice', 20220101, 20221231, 'N');

-- Second (current) version of customer
INSERT INTO customer_dim (customer_id, name, start_date_key, end_date_key, is_current)
VALUES (1001, 'Alice Smith', 20230101, 99991231, 'Y');

-- Another customer, still active
INSERT INTO customer_dim (customer_id, name, start_date_key, end_date_key, is_current)
VALUES (1002, 'Bob', 20220515, 99991231, 'Y');

 


3. Query current/active records

-- Using is_current flag
SELECT *
FROM customer_dim
WHERE is_current = 'Y';

-- OR using end_date_key = 99991231
SELECT c.*
FROM customer_dim c
JOIN date_dim d ON c.end_date_key = d.date_key
WHERE d.date_key = 99991231;

-- OR using a specific reporting date
SELECT *
FROM customer_dim
WHERE 20250815 BETWEEN start_date_key AND end_date_key;

 


4. Notes / Best Practices

  1. start_date_key and end_date_key are FKs pointing to date_dim.

    • This ensures consistency for joins and allows access to fiscal/calendar attributes.

  2. 9999-12-31 sentinel indicates active/current record.

  3. is_current flag is optional but convenient for fast queries.

  4. Updating SCD2 records:

    • When a customer changes, set the previous record’s end_date_key to DATE_KEY_OF_DAY_BEFORE_NEW_START and is_current = 'N'.

    • Insert a new record with start_date_key = new_start_date and end_date_key = 99991231, is_current = 'Y'.


This pattern allows easy historical tracking, accurate reporting for any date, and works seamlessly with the date_dim you generated.

 

posted on 2025-08-15 13:57  ZhangZhihuiAAA  阅读(10)  评论(0)    收藏  举报