zadev / bru

bots R us
0 stars 0 forks source link

Historical Log Support Query Grammar #1

Open notheotherben opened 9 years ago

notheotherben commented 9 years ago

It was decided on the Slack channel that we would be adding support for historical logging with the help of @jrgns and @len. The objective was to make use of ElasticSearch to archive all messages sent across the various Slack channels and then to make use of the bru bot to access these logs.

A rough grammar to power this was proposed, allowing for the following commands.

tldr #general
tldr #general since 6 hours ago
tldr #general between 4 days ago and -2h
tldr #code-of-conduct from 6 hours ago
tldr #music-to-code-to since 6 hours ago
tldr #csharp from 6 hours ago
tldr #node since 6 hours ago to 2 hours ago
tldr #elasticsearch from -12d to -1d

I've defined a grammar using PEG which you can test online. It outputs objects with the following structure:

{
  "channel": "general",
  "time": {
    "start": -86400000,
    "end": 0
  }
}

Where time.start and time.end are the millisecond offset from the current time and indicate the range on which to search.

{
  var unitValues = [1000, 1000 * 60, 1000 * 3600, 1000 * 86400, 1000 * 86400 * 7, 1000 * 86400 * 30, 1000 * 86400 * 365];
  var longUnits = ["second", "minute", "hour", "day", "week", "month", "year"];
  function parseLongUnit(unit) {
    return unitValues[longUnits.indexOf(unit)];
  }

  var shortUnits = ["s", "m", "h", "d", "w", "M", "y"];
  function parseShortUnit(unit) {
    return unitValues[shortUnits.indexOf(unit)];
  }
}

requests
  = requests:(request)* { return requests; }

request
  = "tldr" whitespace channel:channel whitespace? time:timespan? whitespace* [\n]? { return { channel: channel, time: time || { start: -86400000, end: 0 } }; }

channel
  = "#" name:string { return name; }

timespan
  = timespanStart whitespace start:time (whitespace timespanSeparator whitespace)? end:time? { return { start: start, end: end || 0 }; }

timespanStart
  = ("between" / "from" / "since")

timespanSeparator
  = "to" / "and" / " - "

time
  = milliseconds:(shortTime / longTime) { return milliseconds; }

shortTime
  = "-" quantity:integer unit:shortTimeUnit { return -quantity * unit; }

longTime
  = quantity:integer whitespace unit:longTimeUnit whitespace "ago" { return -quantity * unit; }

shortTimeUnit
  = unit:[ymwdMhs] { return parseShortUnit(unit); }

longTimeUnit
  = unit:("year" / "month" / "week" / "day" / "hour" / "minute" / "second") "s"? { return parseLongUnit(unit); }

integer "integer"
  = digits:[0-9]+ { return parseInt(digits.join(""), 10); }

string "string"
  = word / singleQuotedString / doubleQuotedString

word
  = letters:[^ \n\r\t]+ { return letters.join(""); }

singleQuotedString
  = "'" characters:[^']* "'" { return characters.join(""); }

doubleQuotedString
  = '"' characters:[^"]* '"' { return characters.join(""); }

whitespace
  = space:[ \t]

A couple of improvements which I think might be nice to add is friendly names for relative times - things like yesterday, now, last week and so on. I also haven't implemented any kind of error parsing logic, which would be used to advise you on ways to fix your query if it didn't match the grammar.

Would love to hear any suggestions you guys have regarding changes you think should be made, improvements, tweaks etc.

jrgns commented 9 years ago

Looking good! I like the idea of adding friendly names for relative times, it makes it much easier to use.

notheotherben commented 9 years ago

Right, here's a grammar which supports friendly relative times, it's intended to be used with moment.

{
  var unitValues = [1000, 1000 * 60, 1000 * 3600, 1000 * 86400, 1000 * 86400 * 7, 1000 * 86400 * 30, 1000 * 86400 * 365];
  var longUnits = ["second", "minute", "hour", "day", "week", "month", "year"];
  function parseLongUnit(unit) {
    return unitValues[longUnits.indexOf(unit)];
  }

  var shortUnits = ["s", "m", "h", "d", "w", "M", "y"];
  function parseShortUnit(unit) {
    return unitValues[shortUnits.indexOf(unit)];
  }

  // Actively compute these from moment()
  var friendlyTimeAliases = {
    "now": function() { return 0; },
    "yesterday": function() { return -86400000; },
    "this morning": function() { return -43200000; },
    "last week": function() { return -86400000 * 7; }
  };
}

requests
  = requests:(request)* { return requests; }

request
  = "tldr" whitespace channel:channel whitespace? time:timespan? whitespace* [\n]? { return { channel: channel, time: time || { start: -86400000, end: 0 } }; }

channel
  = "#"? name:string { return name; }

timespan
  = timespanStart whitespace start:time (whitespace timespanSeparator whitespace)? end:time? { return { start: start, end: end || 0 }; }

time
  = milliseconds:(shortTime / longTime / friendlyTime) { return milliseconds; }

shortTime
  = "-" quantity:integer unit:shortTimeUnit { return -quantity * unit; }

longTime
  = quantity:integer whitespace unit:longTimeUnit whitespace "ago" { return -quantity * unit; }

friendlyTime
  = alias:("yesterday"/"last week"/"now"/"today"/"this morning") { return (friendlyTimeAliases[alias] || function() { return 0; })(); }

timespanStart
  = ("between" / "from" / "since")

timespanSeparator
  = "to" / "and" / " - "

shortTimeUnit
  = unit:[ymwdMhs] { return parseShortUnit(unit); }

longTimeUnit
  = unit:("year" / "month" / "week" / "day" / "hour" / "minute" / "second") "s"? { return parseLongUnit(unit); }

integer "integer"
  = digits:[0-9]+ { return parseInt(digits.join(""), 10); }

string "string"
  = word / singleQuotedString / doubleQuotedString

word
  = letters:[^ \n\r\t]+ { return letters.join(""); }

singleQuotedString
  = "'" characters:[^']* "'" { return characters.join(""); }

doubleQuotedString
  = '"' characters:[^"]* '"' { return characters.join(""); }

whitespace
  = space:[ \t]

Example queries that it's designed to work with

tldr #general
tldr #general since 6 hours ago
tldr #general between 4 days ago and -2h
tldr #code-of-conduct from 6 hours ago
tldr #music-to-code-to since 6 hours ago
tldr #csharp from 6 hours ago
tldr #node since 6 hours ago to 2 hours ago
tldr #elasticsearch from -12d to -1d
tldr general since yesterday
tldr code-of-conduct since this morning
tldr music since last week