rollthecloudinc / hedge

Award winning climate aware database trimming cloud carbon and costs.
22 stars 2 forks source link

Add military_hour and region_military_hour fields to renewable-record-* and carbon-intensity-* documents #147

Open ng-druid opened 1 year ago

ng-druid commented 1 year ago

Executed below in dashboards on prod as a quick win but this needs to be added to each renewable record document created.

indexes:

POST renewable-record-001/_update_by_query

{
  "query": {
    "match_all": {}
  },
  "script": {
    "source": """
      if (ctx._source.containsKey('created_date')) {
        Instant instant = Instant.parse(ctx._source.created_date);
        LocalDateTime createdDate = LocalDateTime.ofInstant(instant, ZoneId.of('UTC'));
        int hour = createdDate.getHour();
        ctx._source.military_hour = hour;
      }
    """
  }
}

Exclude documents that already have military hour.

POST renewable-record-001/_update_by_query

{
  "query": {
    "bool": {
      "must_not": [
        {
          "exists": {
            "field": "military_hour"
          }
        }
      ]
    }
  },
  "script": {
    "source": """
      if (ctx._source.containsKey('created_date')) {
        Instant instant = Instant.parse(ctx._source.created_date);
        LocalDateTime createdDate = LocalDateTime.ofInstant(instant, ZoneId.of('UTC'));
        int hour = createdDate.getHour();
        ctx._source.military_hour = hour;
      }
    """
  }
}

This needs to be added to optimize the buckets for creating the radial graph. Otherwise a scripted agg needs to be used which is incredibly slow.

Query for graph

POST renewable-record-001/_search

{
  "size": 0,
  "query": {
    "match_all":{}
  },
  "aggs": {
    "regions": {
      "terms": {
        "field": "region.keyword",
        "size": 50
      },
      "aggs": {
        "hours": {
          "terms": {
            "field": "military_hour",
            "size": 25
          },
          "aggs": {
            "avgIntensity": {
              "avg": {
                "field": "intensity"
              }
            }
          }
        }
      }
    }
  }
}

Create field that combines region and military time for added optimization for aggregate calc.

POST your_index_name/_update_by_query

{
  "query": {
    "exists": {
      "field": "region"
    }
  },
  "script": {
    "source": """
      if (ctx._source.containsKey('military_time')) {
        String militaryTime = ctx._source.military_time.toString();
        ctx._source.region_military_time = ctx._source.region + "/" + militaryTime;
      }
    """
  }
}

Query to add new field but skip over documents with the field already.

POST renewable-record-001/_update_by_query

{
  "query": {
    "exists": {
      "field": "region"
    },
    "bool": {
      "must_not": [
        {
          "exists": {
            "field": "region_military_time"
          }
        }
      ]
    }
  },
  "script": {
    "source": """
      if (ctx._source.containsKey('military_time')) {
        String militaryTime = ctx._source.military_time.toString();
        ctx._source.region_military_time = ctx._source.region + "/" + militaryTime;
      }
    """
  }
}

corrected

POST renewable-record-001/_update_by_query

{
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "region"
          }
        }
      ],
      "must_not": [
        {
          "exists": {
            "field": "region_military_hour"
          }
        }
      ]
    }
  },
  "script": {
    "source": """
      if (ctx._source.containsKey('military_hour')) {
        String militaryHour = ctx._source.military_hour.toString();
        ctx._source.region_military_hour = ctx._source.region + "/" + militaryHour;
      }
    """
  }
}

This is correct hover its still slow and has a limit.

POST renewable-record-001/_search

{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "region_military_hour" 
          }
        }
      ]
    }
  },
  "aggs": {
    "regions": {
      "terms": {
        "field": "region_military_hour.keyword",
        "size": 250
      },
      "aggs": {
        "avgIntensity": {
          "avg": {
            "field": "intensity"
          }
        },
        "intensityExclusion": {
          "filter": {
            "range": {
              "intensity": {
                "gt": 0
              }
            }
          }
        }
      }
    }
  }
}