Previously we'd group where conditions into a single-ish where clause. If filters needed to select additional data they'd do so before declaring their where condition.
match (node)-[:start { active: true }]->(start:Property)
match (node)-[:end { active: true }]->(end:Property)
...
where start.value >= $start and end.value <= $end
What this meant though is that filter work could have to be done on all of the (node)s instead of filtering immediately.
This was happening with ProgressReports trying to filter by project name.
All of the work to navigate to the project and run the full text search was happening on every report, before we filtered down to just reports for the given quarter.
Now
Now I have changed it to have each filter produce its own where clause instead of grouping them together.
For simple cases, the cypher performance is the same.
match (node)-[:start { active: true }]->(start:Property)
where start.value >= $start
match (node)-[:end { active: true }]->(end:Property)
where end.value <= $end
But this makes a big impact on reducing the data for subsequent filters that are more complex.
Merging sibling WHERE clauses
One of the reasons the former logic existed was because where clauses cannot follow another.
where node.type = "TranslationProject"
where node.createdAt > date("2024")
I updated the QB to look for this case when chaining and merge them.
Results
So to take full advantage of this I've sorted the filters to have the cheapest and most effective ones first (just based on gut feeling).
- 47,719,790 total db hits in 332,911 ms
+ 3,142,563 total db hits in 10,509 ms
15x better 🎉
But still 10s query 🫤
Query Before
```cypher
MATCH (node:ProgressReport)<-[:report]-(:LanguageEngagement)<-[:engagement]-(project:Project)
MATCH (requestingUser:User { id: '5c521b098a0ddd9231b84a4a' })
// ProgressReportFilters
MATCH (node)-[:start { active: true }]->(start:Property)
MATCH (node)-[:end { active: true }]->(end:Property)
CALL {
WITH node, requestingUser
WITH node as report, requestingUser
MATCH (report)<-[:report]-(node:Engagement)
// EngagementFilters
CALL {
WITH node, requestingUser
WITH node as eng, requestingUser
MATCH (eng)<-[:engagement]-(node:Project)
// ProjectFilters
RETURN true as projectFiltersApplied
limit 1
}
WITH *
CALL {
WITH node
WITH node as eng
MATCH (eng)-[:language]->(node:Language)
// LanguageFilters
CALL {
UNWIND ['(gaut~^2 OR *gaut*)'] AS query
CALL {
WITH query
CALL db.index.fulltext.queryNodes('LanguageName', query, { limit: 100 })
YIELD node as match, score
WHERE score > 0
MATCH (node:Language)-[{ active: true }]->(match)
RETURN collect(distinct node) as nameMatches
}
RETURN collect(nameMatches) as nameMatchSets
}
WITH *
WHERE all(nameMatches in nameMatchSets where node in nameMatches)
RETURN true as languageFiltersApplied
limit 1
}
WITH *
RETURN true as engagementFiltersApplied
limit 1
}
WITH *
WHERE start.value >= date('2024-07-01') AND end.value <= date('2024-09-30')
// sorting(start)
CALL {
WITH *
MATCH (node)-[:start { active: true }]->(sortProp:Property)
RETURN sortProp.value as sortValue
}
WITH *
ORDER BY sortValue
// paginate()
WITH collect(distinct node) as list
WITH list, list[..25] as page
CALL {
WITH page
UNWIND page as node
WITH node, apoc.coll.indexOf(page, node) as order
// Hydrating node
CALL {
WITH node
MATCH (node)<-[:report { active: true }]-(parent:LanguageEngagement)<-[:engagement { active: true }]-(project:Project)
// matchPropsAndProjectSensAndScopedRoles()
CALL {
WITH node, project
// matchProjectSens()
CALL {
WITH project
WITH project
WHERE project IS NOT NULL AND project.type = "Internship"
MATCH (project)-[:sensitivity { active: true }]->(projSens:Property)
RETURN projSens.value as sensitivity
UNION
WITH project
WITH project
WHERE project IS NOT NULL AND project.type <> "Internship"
OPTIONAL MATCH (project)-[:engagement { active: true }]->(:LanguageEngagement)-[:language { active: true }]->(:Language)-[:sensitivity { active: true }]->(langSens:Property)
WITH *
ORDER BY case langSens.value when 'High' then 3 when 'Medium' then 2 when 'Low' then 1 end DESC
LIMIT 1
RETURN coalesce(langSens.value, "High") as sensitivity
UNION
WITH project
WITH project
WHERE project IS NULL
RETURN "High" as sensitivity
}
// matchProps(node)
CALL {
WITH node
MATCH (node)-[r { active: true }]->(prop:Property)
WITH node, collect(apoc.map.fromValues([type(r), prop.value])) as collectedProps
WITH [node] + collectedProps as propList
RETURN apoc.map.mergeList(propList) as props
}
// matchProjectScopedRoles()
CALL {
WITH project
MATCH (project)-[:member]->(projectMember)-[:user]->(requestingUser:User { id: '5c521b098a0ddd9231b84a4a' }),
(projectMember)-[:roles { active: true }]->(rolesProp:Property)
WITH collect(rolesProp.value) as memberRoleProps
RETURN case size(memberRoleProps) > 0 when true then ["member:true"] else [] end + reduce(scopedRoles = [], role IN apoc.coll.flatten(memberRoleProps) | scopedRoles + ["project:" + role]) as scopedRoles
UNION
WITH project
WITH project
WHERE project IS NULL
RETURN [] as scopedRoles
}
RETURN apoc.map.merge(props, { sensitivity: sensitivity, scope: scopedRoles }) as props
}
CALL {
WITH node
RETURN { `__typename`: "ProgressReport" } as extra
}
RETURN apoc.map.mergeList([props, { parent: parent }, extra]) as dto
}
WITH *
ORDER BY order
RETURN collect(dto) as hydratedPage
}
RETURN hydratedPage as items, size(list) as total, size(page) > 0 and page[-1] <> list[-1] as hasMore
```
Profile Before
![Plan (6)](https://github.com/user-attachments/assets/e74710e1-4594-47aa-ac94-00a5791cc1e0)
Query After
```cypher
MATCH (node:ProgressReport)<-[:report]-(:LanguageEngagement)<-[:engagement]-(project:Project)
MATCH (requestingUser:User { id: '5c521b098a0ddd9231b84a4a' })
// ProgressReportFilters
MATCH (node)-[:start { active: true }]->(start:Property)
WHERE start.value >= date('2024-07-01')
MATCH (node)-[:end { active: true }]->(end:Property)
where end.value <= date('2024-09-30')
CALL {
WITH node, requestingUser
WITH node as report, requestingUser
MATCH (report)<-[:report]-(node:Engagement)
// EngagementFilters
CALL {
WITH node, requestingUser
WITH node as eng, requestingUser
MATCH (eng)<-[:engagement]-(node:Project)
// ProjectFilters
RETURN true as projectFiltersApplied
limit 1
}
WITH *
CALL {
WITH node
WITH node as eng
MATCH (eng)-[:language]->(node:Language)
// LanguageFilters
CALL {
UNWIND ['(gaut~^2 OR *gaut*)'] AS query
CALL {
WITH query
CALL db.index.fulltext.queryNodes('LanguageName', query, { limit: 100 })
YIELD node as match, score
WHERE score > 0
MATCH (node:Language)-[{ active: true }]->(match)
RETURN collect(distinct node) as nameMatches
}
RETURN collect(nameMatches) as nameMatchSets
}
WITH *
WHERE all(nameMatches in nameMatchSets where node in nameMatches)
RETURN true as languageFiltersApplied
limit 1
}
WITH *
RETURN true as engagementFiltersApplied
limit 1
}
WITH *
// sorting(start)
CALL {
WITH *
MATCH (node)-[:start { active: true }]->(sortProp:Property)
RETURN sortProp.value as sortValue
}
WITH *
ORDER BY sortValue
// paginate()
WITH collect(distinct node) as list
WITH list, list[..25] as page
CALL {
WITH page
UNWIND page as node
WITH node, apoc.coll.indexOf(page, node) as order
// Hydrating node
CALL {
WITH node
MATCH (node)<-[:report { active: true }]-(parent:LanguageEngagement)<-[:engagement { active: true }]-(project:Project)
// matchPropsAndProjectSensAndScopedRoles()
CALL {
WITH node, project
// matchProjectSens()
CALL {
WITH project
WITH project
WHERE project IS NOT NULL AND project.type = "Internship"
MATCH (project)-[:sensitivity { active: true }]->(projSens:Property)
RETURN projSens.value as sensitivity
UNION
WITH project
WITH project
WHERE project IS NOT NULL AND project.type <> "Internship"
OPTIONAL MATCH (project)-[:engagement { active: true }]->(:LanguageEngagement)-[:language { active: true }]->(:Language)-[:sensitivity { active: true }]->(langSens:Property)
WITH *
ORDER BY case langSens.value when 'High' then 3 when 'Medium' then 2 when 'Low' then 1 end DESC
LIMIT 1
RETURN coalesce(langSens.value, "High") as sensitivity
UNION
WITH project
WITH project
WHERE project IS NULL
RETURN "High" as sensitivity
}
// matchProps(node)
CALL {
WITH node
MATCH (node)-[r { active: true }]->(prop:Property)
WITH node, collect(apoc.map.fromValues([type(r), prop.value])) as collectedProps
WITH [node] + collectedProps as propList
RETURN apoc.map.mergeList(propList) as props
}
// matchProjectScopedRoles()
CALL {
WITH project
MATCH (project)-[:member]->(projectMember)-[:user]->(requestingUser:User { id: 'd1bebdaa-efcf-57ee-84cd-0b2bfa01eaa7' }),
(projectMember)-[:roles { active: true }]->(rolesProp:Property)
WITH collect(rolesProp.value) as memberRoleProps
RETURN case size(memberRoleProps) > 0 when true then ["member:true"] else [] end + reduce(scopedRoles = [], role IN apoc.coll.flatten(memberRoleProps) | scopedRoles + ["project:" + role]) as scopedRoles
UNION
WITH project
WITH project
WHERE project IS NULL
RETURN [] as scopedRoles
}
RETURN apoc.map.merge(props, { sensitivity: sensitivity, scope: scopedRoles }) as props
}
CALL {
WITH node
RETURN { `__typename`: "ProgressReport" } as extra
}
RETURN apoc.map.mergeList([props, { parent: parent }, extra]) as dto
}
WITH *
ORDER BY order
RETURN collect(dto) as hydratedPage
}
RETURN hydratedPage as items, size(list) as total, size(page) > 0 and page[-1] <> list[-1] as hasMore
```
Profile After
![Plan (5)](https://github.com/user-attachments/assets/a8f4b94d-0525-4b78-b87e-c19385dfb545)
Previously
Previously we'd group where conditions into a single-ish where clause. If filters needed to select additional data they'd do so before declaring their where condition.
What this meant though is that filter work could have to be done on all of the
(node)
s instead of filtering immediately.This was happening with ProgressReports trying to filter by project name. All of the work to navigate to the project and run the full text search was happening on every report, before we filtered down to just reports for the given quarter.
Now
Now I have changed it to have each filter produce its own where clause instead of grouping them together. For simple cases, the cypher performance is the same.
But this makes a big impact on reducing the data for subsequent filters that are more complex.
Merging sibling WHERE clauses
One of the reasons the former logic existed was because where clauses cannot follow another.
I updated the QB to look for this case when chaining and merge them.
Results
So to take full advantage of this I've sorted the filters to have the cheapest and most effective ones first (just based on gut feeling).
15x better 🎉 But still 10s query 🫤
Query Before
```cypher MATCH (node:ProgressReport)<-[:report]-(:LanguageEngagement)<-[:engagement]-(project:Project) MATCH (requestingUser:User { id: '5c521b098a0ddd9231b84a4a' }) // ProgressReportFilters MATCH (node)-[:start { active: true }]->(start:Property) MATCH (node)-[:end { active: true }]->(end:Property) CALL { WITH node, requestingUser WITH node as report, requestingUser MATCH (report)<-[:report]-(node:Engagement) // EngagementFilters CALL { WITH node, requestingUser WITH node as eng, requestingUser MATCH (eng)<-[:engagement]-(node:Project) // ProjectFilters RETURN true as projectFiltersApplied limit 1 } WITH * CALL { WITH node WITH node as eng MATCH (eng)-[:language]->(node:Language) // LanguageFilters CALL { UNWIND ['(gaut~^2 OR *gaut*)'] AS query CALL { WITH query CALL db.index.fulltext.queryNodes('LanguageName', query, { limit: 100 }) YIELD node as match, score WHERE score > 0 MATCH (node:Language)-[{ active: true }]->(match) RETURN collect(distinct node) as nameMatches } RETURN collect(nameMatches) as nameMatchSets } WITH * WHERE all(nameMatches in nameMatchSets where node in nameMatches) RETURN true as languageFiltersApplied limit 1 } WITH * RETURN true as engagementFiltersApplied limit 1 } WITH * WHERE start.value >= date('2024-07-01') AND end.value <= date('2024-09-30') // sorting(start) CALL { WITH * MATCH (node)-[:start { active: true }]->(sortProp:Property) RETURN sortProp.value as sortValue } WITH * ORDER BY sortValue // paginate() WITH collect(distinct node) as list WITH list, list[..25] as page CALL { WITH page UNWIND page as node WITH node, apoc.coll.indexOf(page, node) as order // Hydrating node CALL { WITH node MATCH (node)<-[:report { active: true }]-(parent:LanguageEngagement)<-[:engagement { active: true }]-(project:Project) // matchPropsAndProjectSensAndScopedRoles() CALL { WITH node, project // matchProjectSens() CALL { WITH project WITH project WHERE project IS NOT NULL AND project.type = "Internship" MATCH (project)-[:sensitivity { active: true }]->(projSens:Property) RETURN projSens.value as sensitivity UNION WITH project WITH project WHERE project IS NOT NULL AND project.type <> "Internship" OPTIONAL MATCH (project)-[:engagement { active: true }]->(:LanguageEngagement)-[:language { active: true }]->(:Language)-[:sensitivity { active: true }]->(langSens:Property) WITH * ORDER BY case langSens.value when 'High' then 3 when 'Medium' then 2 when 'Low' then 1 end DESC LIMIT 1 RETURN coalesce(langSens.value, "High") as sensitivity UNION WITH project WITH project WHERE project IS NULL RETURN "High" as sensitivity } // matchProps(node) CALL { WITH node MATCH (node)-[r { active: true }]->(prop:Property) WITH node, collect(apoc.map.fromValues([type(r), prop.value])) as collectedProps WITH [node] + collectedProps as propList RETURN apoc.map.mergeList(propList) as props } // matchProjectScopedRoles() CALL { WITH project MATCH (project)-[:member]->(projectMember)-[:user]->(requestingUser:User { id: '5c521b098a0ddd9231b84a4a' }), (projectMember)-[:roles { active: true }]->(rolesProp:Property) WITH collect(rolesProp.value) as memberRoleProps RETURN case size(memberRoleProps) > 0 when true then ["member:true"] else [] end + reduce(scopedRoles = [], role IN apoc.coll.flatten(memberRoleProps) | scopedRoles + ["project:" + role]) as scopedRoles UNION WITH project WITH project WHERE project IS NULL RETURN [] as scopedRoles } RETURN apoc.map.merge(props, { sensitivity: sensitivity, scope: scopedRoles }) as props } CALL { WITH node RETURN { `__typename`: "ProgressReport" } as extra } RETURN apoc.map.mergeList([props, { parent: parent }, extra]) as dto } WITH * ORDER BY order RETURN collect(dto) as hydratedPage } RETURN hydratedPage as items, size(list) as total, size(page) > 0 and page[-1] <> list[-1] as hasMore ```Profile Before
![Plan (6)](https://github.com/user-attachments/assets/e74710e1-4594-47aa-ac94-00a5791cc1e0)Query After
```cypher MATCH (node:ProgressReport)<-[:report]-(:LanguageEngagement)<-[:engagement]-(project:Project) MATCH (requestingUser:User { id: '5c521b098a0ddd9231b84a4a' }) // ProgressReportFilters MATCH (node)-[:start { active: true }]->(start:Property) WHERE start.value >= date('2024-07-01') MATCH (node)-[:end { active: true }]->(end:Property) where end.value <= date('2024-09-30') CALL { WITH node, requestingUser WITH node as report, requestingUser MATCH (report)<-[:report]-(node:Engagement) // EngagementFilters CALL { WITH node, requestingUser WITH node as eng, requestingUser MATCH (eng)<-[:engagement]-(node:Project) // ProjectFilters RETURN true as projectFiltersApplied limit 1 } WITH * CALL { WITH node WITH node as eng MATCH (eng)-[:language]->(node:Language) // LanguageFilters CALL { UNWIND ['(gaut~^2 OR *gaut*)'] AS query CALL { WITH query CALL db.index.fulltext.queryNodes('LanguageName', query, { limit: 100 }) YIELD node as match, score WHERE score > 0 MATCH (node:Language)-[{ active: true }]->(match) RETURN collect(distinct node) as nameMatches } RETURN collect(nameMatches) as nameMatchSets } WITH * WHERE all(nameMatches in nameMatchSets where node in nameMatches) RETURN true as languageFiltersApplied limit 1 } WITH * RETURN true as engagementFiltersApplied limit 1 } WITH * // sorting(start) CALL { WITH * MATCH (node)-[:start { active: true }]->(sortProp:Property) RETURN sortProp.value as sortValue } WITH * ORDER BY sortValue // paginate() WITH collect(distinct node) as list WITH list, list[..25] as page CALL { WITH page UNWIND page as node WITH node, apoc.coll.indexOf(page, node) as order // Hydrating node CALL { WITH node MATCH (node)<-[:report { active: true }]-(parent:LanguageEngagement)<-[:engagement { active: true }]-(project:Project) // matchPropsAndProjectSensAndScopedRoles() CALL { WITH node, project // matchProjectSens() CALL { WITH project WITH project WHERE project IS NOT NULL AND project.type = "Internship" MATCH (project)-[:sensitivity { active: true }]->(projSens:Property) RETURN projSens.value as sensitivity UNION WITH project WITH project WHERE project IS NOT NULL AND project.type <> "Internship" OPTIONAL MATCH (project)-[:engagement { active: true }]->(:LanguageEngagement)-[:language { active: true }]->(:Language)-[:sensitivity { active: true }]->(langSens:Property) WITH * ORDER BY case langSens.value when 'High' then 3 when 'Medium' then 2 when 'Low' then 1 end DESC LIMIT 1 RETURN coalesce(langSens.value, "High") as sensitivity UNION WITH project WITH project WHERE project IS NULL RETURN "High" as sensitivity } // matchProps(node) CALL { WITH node MATCH (node)-[r { active: true }]->(prop:Property) WITH node, collect(apoc.map.fromValues([type(r), prop.value])) as collectedProps WITH [node] + collectedProps as propList RETURN apoc.map.mergeList(propList) as props } // matchProjectScopedRoles() CALL { WITH project MATCH (project)-[:member]->(projectMember)-[:user]->(requestingUser:User { id: 'd1bebdaa-efcf-57ee-84cd-0b2bfa01eaa7' }), (projectMember)-[:roles { active: true }]->(rolesProp:Property) WITH collect(rolesProp.value) as memberRoleProps RETURN case size(memberRoleProps) > 0 when true then ["member:true"] else [] end + reduce(scopedRoles = [], role IN apoc.coll.flatten(memberRoleProps) | scopedRoles + ["project:" + role]) as scopedRoles UNION WITH project WITH project WHERE project IS NULL RETURN [] as scopedRoles } RETURN apoc.map.merge(props, { sensitivity: sensitivity, scope: scopedRoles }) as props } CALL { WITH node RETURN { `__typename`: "ProgressReport" } as extra } RETURN apoc.map.mergeList([props, { parent: parent }, extra]) as dto } WITH * ORDER BY order RETURN collect(dto) as hydratedPage } RETURN hydratedPage as items, size(list) as total, size(page) > 0 and page[-1] <> list[-1] as hasMore ```Profile After
![Plan (5)](https://github.com/user-attachments/assets/a8f4b94d-0525-4b78-b87e-c19385dfb545)