zenodo / zenodo-rdm

Zenodo, powered by InvenioRDM
https://zenodo.org
GNU General Public License v2.0
64 stars 29 forks source link

Store queries in DB table and index as OpenSearch percolators to allow for reverse lookup #1042

Closed slint closed 2 weeks ago

slint commented 3 weeks ago

Rough design:

class Query(db.Model):

    id = db.Column(db.Integer, primary_key=True)

    score = db.Column(db.Integer, default=0)
    query_string = db.Column(db.Text, nullable=False)

    notes = db.Column(db.Text, nullable=True)

    active = db.Column(db.Boolean, default=True)

# Create the query (low-level API)
new_query = Query(
    query_string='"1080p download"',
    score=5,
)
db.session.add(new_query)
db.session.commit()

# Index as percolator (low-level API)
current_search_client.index(
    index="moderation-records",
    body={
        "query": {
            "query_string": {"query": new_query.query_string},
        },
        "score": new_query.score,
        "active": new_query.active,
    },
)

# High level
Query.create(
    query_string='"1080p download"',
    score=5,
)
db.session.commit()

# TODO: In the future we could have a REST API to hook-in the administration interface
"""POST /api/moderation/queries
{
    "query_string": "1080p download",
    "score": 5
}
"""

## Create mapping
record_mapping = current_search_client.indices.get("rdmrecords-records-record-v7.0.0")
current_search_client.create_index(
  index="moderation-records",
  body="mappings": {
    "properties": {
        # Inclue the "live" mapping
        **record_mapping,

        # Percolator-specific fields
        "query": {
            "type": "percolator"
        },
        "score": {
            "type": "integer"
        },
        "active": {
            "type": "boolean"
        }
    }
  }
)

## Usage - During moderation rules
matched_queries = current_search_client.search(
    index="moderation-records",
    body={
        "query": {
            "percolate": {
                "field": "query",
                "document": record.dumps(),
            }
        }
    },
)