m-h-c-t / mh-hunt-helper

Tool to help collect data, estimate attraction, catch, and drop rates for a popular game.
https://www.mhct.win
MIT License
7 stars 14 forks source link

Start Aggregating Hunt Details #145

Open AardWolf opened 3 years ago

AardWolf commented 3 years ago

The hunt_details table is getting large; let's aggregate it to save space.

I ran an experiment for Fort Rox focusing on specific detail types and into two table structures.

950K rows in hunt_details take up 72 MB and were aggregated into 225K hunts that take up 6.5MB (small) or 7.5MB (wide).

This experiment shows a potentially large savings; fort_rox has 7 detail types that were of interest for aggregating. Other areas will have more or fewer; Zugzwang's Tower and Fiery Warpath would be worth exploring as well.

Proposal is to first implement these as schedulable aggregations in a stored procedure - one per area; cleaning hunt_details as they aggregate. Second phase would be to add a control table similar to filters and an overall hunt_detail aggregator similar to aggregate_all.

AardWolf commented 3 years ago

The SQL from my experiment is below. The conversation on Discord with counts and size details is at https://discord.com/channels/275500976662773761/355474934601875457/789881879184015370

create table hd_fort_rox (
    hunt_id int not null,
    weakened_weremice   tinyint default 0,
    can_autocatch_weremice  tinyint default 0,
    autocatch_nightmancer   tinyint default 0,
    weakened_critters   tinyint default 0,
    can_autocatch_critter   tinyint default 0,
    autocatch_nightfire tinyint default 0,
    can_autocatch_any tinyint default 0,
    primary key (hunt_id)
);

create table hd_fort_rox_sm (
    hunt_id int not null,
    cannon_level    tinyint default 0,
    ballista_level  tinyint default 0,
    tower_autocatch tinyint default 0,
    primary key (hunt_id)
);

-- The awful pivot(s)
insert into hd_fort_rox
select hunt_id, max(weakened_weremice) weakened_weremice, max(can_autocatch_weremice) can_autocatch_weremice, max(autocatch_nightmancer) autocatch_nightmancer, 
    max(weakened_critters) weakened_critters, max(can_autocatch_critters) can_autocatch_critters, max(autocatch_nightfire) autocatch_nightfire,
    max(can_autocatch_any) can_autocatch_any
from (
SELECT h.id as hunt_id, case when dt.name = 'weakened_weremice' then 1 else 0 end as weakened_weremice,
    case when dt.name = 'can_autocatch_weremice' then 1 else 0 end as can_autocatch_weremice,
    case when dt.name = 'autocatch_nightmancer' then 1 else 0 end as autocatch_nightmancer,
    case when dt.name = 'weakened_critters' then 1 else 0 end as weakened_critters,
    case when dt.name = 'can_autocatch_critters' then 1 else 0 end as can_autocatch_critters,
    case when dt.name = 'autocatch_nightfire' then 1 else 0 end as autocatch_nightfire,
    case when dt.name = 'can_autocatch_any' then 1 else 0 end as can_autocatch_any
from hunts h inner join hunt_details hd 
    on h.id = hd.hunt_id 
    INNER JOIN detail_types dt 
    ON hd.detail_type_id = dt.id 
    AND dt.name in ('weakened_weremice', 'can_autocatch_weremice', 'autocatch_nightmancer', 'weakened_critters', 
                    'can_autocatch_critters', 'autocatch_nightfire', 'can_autocatch_any')
where h.location_id = (select id from locations where name = 'Fort Rox')
) as t 
group by hunt_id
;

insert into hd_fort_rox_sm
select hunt_id, max(ballista) ballista, max(cannon) cannon, max(can_autocatch_any) can_autocatch_any
from (
SELECT h.id as hunt_id, case when dt.name = 'weakened_weremice' then 1 
        when dt.name = 'can_autocatch_weremice' then 2 
        when dt.name = 'autocatch_nightmancer' then 3 else 0 end as ballista,
    case when dt.name = 'weakened_critters' then 1 
        when dt.name = 'can_autocatch_critters' then 2 
        when dt.name = 'autocatch_nightfire' then 3 else 0 end as cannon,
    case when dt.name = 'can_autocatch_any' then 1 else 0 end as can_autocatch_any
from hunts h inner join hunt_details hd 
    on h.id = hd.hunt_id 
    INNER JOIN detail_types dt 
    ON hd.detail_type_id = dt.id 
    AND dt.name in ('weakened_weremice', 'can_autocatch_weremice', 'autocatch_nightmancer', 'weakened_critters', 
                    'can_autocatch_critters', 'autocatch_nightfire', 'can_autocatch_any')
where h.location_id = (select id from locations where name = 'Fort Rox')
) as t 
group by hunt_id
;

create table hd_size_test like hunt_details;
insert into hd_size_test 
select hd.*
from hunts h inner join hunt_details hd 
    on h.id = hd.hunt_id 
    INNER JOIN detail_types dt 
    ON hd.detail_type_id = dt.id 
    AND dt.name in ('weakened_weremice', 'can_autocatch_weremice', 'autocatch_nightmancer', 'weakened_critters', 
                    'can_autocatch_critters', 'autocatch_nightfire', 'can_autocatch_any')
where h.location_id = (select id from locations where name = 'Fort Rox')
;
AardWolf commented 3 years ago

The delete statement:

delete hd from hunts h inner join hunt_details as hd 
    on h.id = hd.hunt_id 
    INNER JOIN detail_types dt 
    ON hd.detail_type_id = dt.id 
    AND dt.name in ('weakened_weremice', 'can_autocatch_weremice', 'autocatch_nightmancer', 'weakened_critters', 
                    'can_autocatch_critters', 'autocatch_nightfire', 'can_autocatch_any')
where h.location_id = (select id from locations where name = 'Fort Rox')
;
AardWolf commented 3 years ago

I see a few broad categories of details we can aggregate - Location (fort rox's experiment was this), Event (halloween, lny, seh all have particular detail types), and General (pillages, is_lucky_catch, potentially more). We may be adding equipment-specific details such as scoundrel sleigher procs; those would be a fourth category or possibly small enough we don't bother aggregating them.

AardWolf commented 3 years ago

These were tested locally but not set up as an event. I did delete some rows from the hd_ tables and then re-populate them in the hunt_details table from a backup in order to confirm subsequent calls behave as desired.

DROP PROCEDURE IF EXISTS hd_fort_rox;
create table hd_fort_rox (
    hunt_id int not null,
    cannon_level    tinyint default 0,
    ballista_level  tinyint default 0,
    tower_autocatch tinyint default 0,
    primary key (hunt_id)
);

-- DELIMITER $$
CREATE PROCEDURE hd_fort_rox()
hdfr: BEGIN
    DECLARE oldest_hunt int;

    -- Figure out where we left off
    SELECT coalesce(max(hunt_id), 0) INTO oldest_hunt
    FROM hd_fort_rox;

    start transaction;
    insert into hd_fort_rox
    select hunt_id, max(ballista) ballista, max(cannon) cannon, max(can_autocatch_any) can_autocatch_any
    from (
    SELECT h.id as hunt_id, case when dt.name = 'weakened_weremice' then 1 
            when dt.name = 'can_autocatch_weremice' then 2 
            when dt.name = 'autocatch_nightmancer' then 3 else 0 end as ballista,
        case when dt.name = 'weakened_critters' then 1 
            when dt.name = 'can_autocatch_critters' then 2 
            when dt.name = 'autocatch_nightfire' then 3 else 0 end as cannon,
        case when dt.name = 'can_autocatch_any' then 1 else 0 end as can_autocatch_any
    from hunts h inner join hunt_details hd 
        on h.id = hd.hunt_id 
        INNER JOIN detail_types dt 
        ON hd.detail_type_id = dt.id 
        AND dt.name in ('weakened_weremice', 'can_autocatch_weremice', 'autocatch_nightmancer', 'weakened_critters', 
                        'can_autocatch_critters', 'autocatch_nightfire', 'can_autocatch_any')
    where h.location_id = (select id from locations where name = 'Fort Rox')
      and h.id > oldest_hunt
      and hd.detail_value_id = 3 -- only when these flags are true, they default to false
    ) as t 
    group by hunt_id
    ;

    -- clean up what we did
    delete hd from hd_fort_rox hfr inner join hunt_details as hd 
        on hfr.hunt_id = hd.hunt_id 
        INNER JOIN detail_types dt 
        ON hd.detail_type_id = dt.id 
        AND dt.name in ('weakened_weremice', 'can_autocatch_weremice', 'autocatch_nightmancer', 'weakened_critters', 
                        'can_autocatch_critters', 'autocatch_nightfire', 'can_autocatch_any')
    where hfr.hunt_id > oldest_hunt
    ;
    commit;
END;

call hd_fort_rox();

-- hd_bwrift builder
DROP PROCEDURE IF EXISTS hd_bwrift;
create table hd_bwrift (
    hunt_id     integer not null,
    fl_effect_ng    tinyint default 0, -- No Guard (false, true)
    fl_effect_ac    tinyint default 0, -- AColyte Influence
    fl_effect_ex    tinyint default 0, -- EXtra portal
    fl_effect_fr    tinyint default 0, -- FRozen portal
    fl_effect_st    tinyint default 0, -- STalker
    fl_effect_un    tinyint default 0, -- UNlucky alarm
    fl_hourglass    tinyint default 0,
    chamber_status  tinyint default 0, -- closed or open
    cleaver_status  tinyint default 0, -- hidden, available, unavailable, found
    fl_obelisk_charged  tinyint default 0, -- true, false
    fl_acolyte_sand_drained tinyint default 0, -- true, false
    primary key(hunt_id)
);

-- DELIMITER $$
CREATE PROCEDURE hd_bwrift()
hdbwr: BEGIN
    DECLARE oldest_hunt int;

    -- Figure out where we left off
    SELECT coalesce(max(hunt_id), 0) INTO oldest_hunt
    FROM hd_bwrift;

    start transaction;
    insert into hd_bwrift 
    select hunt_id, max(fl_effect_ng) fl_effect_ng, max(fl_effect_ac) fl_effect_ac, max(fl_effect_ex) fl_effect_ex, 
        max(fl_effect_fr) fl_effect_fr, max(fl_effect_st) fl_effect_st, max(fl_effect_un) fl_effect_un,
        max(fl_hourglass) fl_hourglass, max(chamber_status) chamber_status, max(cleaver_status) cleaver_status, 
        max(fl_obelisk_charged) fl_obelisk_charged, max(fl_acolyte_sand_drained) fl_acolyte_sand_drained
    from (
    SELECT h.id as hunt_id, case when dt.name = 'effect_ng' and hd.detail_value_id = 3 then 1 else 0 end as fl_effect_ng,
        case when dt.name = 'effect_ac' and hd.detail_value_id = 3 then 1 else 0 end as fl_effect_ac,
        case when dt.name = 'effect_ex' and hd.detail_value_id = 3 then 1 else 0 end as fl_effect_ex,
        case when dt.name = 'effect_fr' and hd.detail_value_id = 3 then 1 else 0 end as fl_effect_fr,
        case when dt.name = 'effect_st' and hd.detail_value_id = 3 then 1 else 0 end as fl_effect_st,
        case when dt.name = 'effect_un' and hd.detail_value_id = 3 then 1 else 0 end as fl_effect_un,
        case when dt.name = 'has_hourglass' and hd.detail_value_id = 3 then 1 else 0 end as fl_hourglass,
        case when dt.name = 'chamber_status' then hd.detail_value_id else 0 end as chamber_status, -- 2 or 4
        case when dt.name = 'cleaver_status' then hd.detail_value_id else 0 end as cleaver_status, -- 5, 14, 18, 69
        case when dt.name = 'obelisk_charged' and hd.detail_value_id = 3 then 1 else 0 end as fl_obelisk_charged,
        case when dt.name = 'acolyte_sand_drained' and hd.detail_value_id = 3 then 1 else 0 end as fl_acolyte_sand_drained
    from hunts h inner join hunt_details hd 
        on h.id = hd.hunt_id 
        INNER JOIN detail_types dt 
        ON hd.detail_type_id = dt.id 
        AND dt.name in ('effect_ng', 'effect_ac', 'effect_ex', 'effect_fr', 'effect_st', 'effect_un', 'has_hourglass',
                        'chamber_status', 'cleaver_status', 'cleaver_status', 'obelisk_charged', 'acolyte_sand_drained')
    where h.location_id = (select id from locations where name = 'Bristle Woods Rift')
      and h.id > oldest_hunt
    ) as t 
    group by hunt_id
    ;

    -- clean up what we did
    delete hd from hd_bwrift hfr inner join hunt_details as hd 
        on hfr.hunt_id = hd.hunt_id 
        INNER JOIN detail_types dt 
        ON hd.detail_type_id = dt.id 
        AND dt.name in ('effect_ng', 'effect_ac', 'effect_ex', 'effect_fr', 'effect_st', 'effect_un', 'has_hourglass',
                    'chamber_status', 'cleaver_status', 'cleaver_status', 'obelisk_charged', 'acolyte_sand_drained')
    where hfr.hunt_id > oldest_hunt
    ;
    commit;
END;

call hd_bwrift();

Next step is to blow away my local copy and run these scripts as-is to confirm they still do what is needed. Then I suppose deploy them. Would be nice if someone could look them over first.

One thing to note - detail value "3" is "true" but "1" is "false". So 3 is hard-coded up there to turn into a boolean true and everything else is a boolean false (for boolean columns).

AardWolf commented 3 years ago

I'd like to add two columns to mhhunthelper.detail_types:

  1. version_added - this would be populated with the version that detail type was added. Can be populated with the minimum version when joined with the hunts table or by someone who looks at the changelogs. Can be used for inequality join with mhhunthelper.hunts for optional detail types (like if is_lucky_catch has its false entries removed)
  2. detail_notes - a varchar column that has some information about that detail type. Would be useful for bwrift's buff/curses which are two characters.

Alternatively this information could be captured in a new page. In which case this should be a different issue maybe?

AardWolf commented 3 years ago

Event 40 to run the above procedures has been created and scheduled.

AardWolf commented 3 years ago

Halloween, Lunar New Year, and Spring Egg Hunt details

DROP PROCEDURE IF EXISTS hd_event_halloween;
-- Event cleanup

-- Halloween stuff
-- is_firing_cannon is_in_stockpile is_halloween_hunt 
create table hd_event_halloween (
    hunt_id     integer not null,
    is_firing_cannon    tinyint,
    is_in_stockpile     tinyint,
    primary key(hunt_id)
);

-- DELIMITER $$
CREATE PROCEDURE hd_event_halloween()
hdfr: BEGIN

    start transaction;
    insert into hd_event_halloween (hunt_id, is_firing_cannon, is_in_stockpile)
    select hunt_id, max(is_firing_cannon) is_firing_cannon, max(is_in_stockpile) is_in_stockpile
    from (
    SELECT h.id as hunt_id, 
        case when dt.name = 'is_firing_cannon' and hd.detail_value_id = 3 then 1 else 0 end as is_firing_cannon,
        case when dt.name = 'is_in_stockpile' and hd.detail_value_id = 3 then 1 else 0 end as is_in_stockpile
    from hunts h inner join hunt_details hd 
        on h.id = hd.hunt_id 
        INNER JOIN detail_types dt 
        ON hd.detail_type_id = dt.id 
        AND dt.name in ('is_firing_cannon', 'is_in_stockpile')
        inner join (
            select hd.hunt_id 
            from hunt_details hd inner join detail_types dt 
                on dt.id = hd.detail_type_id 
            where dt.name = 'is_halloween_hunt'
        ) hh
        ON hh.hunt_id = hd.hunt_id 
    ) as t 
    group by hunt_id
    ;

    -- clean up what we did
    delete hd from hd_event_halloween heh inner join hunt_details as hd 
            on heh.hunt_id = hd.hunt_id 
        INNER JOIN detail_types dt 
            ON hd.detail_type_id = dt.id 
            AND dt.name in ('is_firing_cannon', 'is_in_stockpile', 'is_halloween_hunt')
    ;
    commit;
END;

call hd_event_halloween();

-- lny
-- is_lny_hunt lny_luck
DROP PROCEDURE IF EXISTS hd_event_lny;
create table hd_event_lny (
    hunt_id     integer not null,
    luck_bonus      tinyint,
    primary key(hunt_id)
);

-- DELIMITER $$
CREATE PROCEDURE hd_event_lny()
hdfr: BEGIN

    start transaction;
    insert into hd_event_lny (hunt_id, luck_bonus)
    select hd.hunt_id, dv.name 
    from hunt_details hd
        inner join detail_types dt 
            on hd.detail_type_id = dt.id 
            and dt.name = 'lny_luck'
        inner join (
            select hd2.hunt_id 
            from hunt_details hd2 
                inner join detail_types dt2 
                    on hd2.detail_type_id = dt2.id 
                    and dt2.name = 'is_lny_hunt'
        ) lny_hunts
            on lny_hunts.hunt_id = hd.hunt_id 
        inner join detail_values dv 
            on hd.detail_value_id = dv.id
    ;

    -- clean up what we did
    delete hd from hd_event_lny lny inner join hunt_details as hd 
            on lny.hunt_id = hd.hunt_id 
        INNER JOIN detail_types dt 
            ON hd.detail_type_id = dt.id 
            AND dt.name in ('is_lny_hunt', 'lny_luck')
    ;
    commit;
END;

call hd_event_lny();

-- SEH stuff
-- can_double_eggs is_egg_hunt egg_charge_post egg_charge_pre
DROP PROCEDURE IF EXISTS hd_event_seh;
create table hd_event_seh (
    hunt_id     integer not null,
    can_double_eggs     tinyint,
    egg_charge_pre  tinyint,
    egg_charge_post tinyint,
    primary key(hunt_id)
);

-- DELIMITER $$
CREATE PROCEDURE hd_event_seh()
hdfr: BEGIN

    start transaction;
    insert into hd_event_seh (hunt_id, can_double_eggs, egg_charge_post, egg_charge_pre)
    select seh.hunt_id, max(can_double_eggs) as can_double_eggs, max(egg_charge_post) as egg_charge_post,
        max(egg_charge_pre) as egg_charge_pre
    from (
        select hd.hunt_id,
            case when dt.name = 'can_double_eggs' and hd.detail_value_id = 3 then 1 else 0 end as can_double_eggs,
            case when dt.name = 'egg_charge_post' then dv.name else null end as egg_charge_post,
            case when dt.name = 'egg_charge_pre' then dv.name else null end as egg_charge_pre
        from hunt_details hd
            inner join detail_types dt 
                on hd.detail_type_id = dt.id 
                and dt.name in ('can_double_eggs', 'egg_charge_post', 'egg_charge_pre')
            inner join (
                select hd2.hunt_id 
                from hunt_details hd2 
                    inner join detail_types dt2 
                        on hd2.detail_type_id = dt2.id 
                        and dt2.name = 'is_egg_hunt'
            ) seh_hunts
                on seh_hunts.hunt_id = hd.hunt_id 
            inner join detail_values dv 
                on hd.detail_value_id = dv.id
        ) as seh
    group by seh.hunt_id
    ;

    -- clean up what we did
    delete hd from hd_event_seh seh inner join hunt_details as hd 
            on seh.hunt_id = hd.hunt_id 
        INNER JOIN detail_types dt 
            ON hd.detail_type_id = dt.id 
            AND dt.name in ('can_double_eggs', 'egg_charge_post', 'egg_charge_pre', 'is_egg_hunt')
    ;
    commit;
END;

call hd_event_seh();