WolfgangFahl / snapquery

Frontend to Introduce Named Queries and Named Query Middleware to wikidata
Apache License 2.0
4 stars 1 forks source link

run test of queries via command line #24

Closed WolfgangFahl closed 1 week ago

WolfgangFahl commented 1 month ago
snapquery -tq --namespace short_url --debug
WolfgangFahl commented 1 week ago
def setup_logger(self, level: int):
        """
        setup logging for the cmd
        """

    def snapquery_evaluate(self, args: argparse.Namespace):
        """
        Handle the evaluation of different endpoints by executing queries and storing the stats
        Args:
            args: argparse namespace
        """
        endpoint_names = args.endpoints
        namespaces = args.namespaces
        context = args.context
        self.nqm = NamedQueryManager.from_samples()
        if not endpoint_names:
            endpoint_names = list(self.nqm.endpoints.keys())
        # validate endpoint names
        skipped_namespaces = []
        for endpoint_name in endpoint_names:
            if endpoint_name not in self.nqm.endpoints:
                logger.error(
                    f"Endpoint {endpoint_name} is not known and thus will be skipped"
                )
                skipped_namespaces.append(endpoint_name)
        endpoint_names = [
            endpoint_name
            for endpoint_name in endpoint_names
            if endpoint_name not in skipped_namespaces
        ]
        queries = []
        for namespace in namespaces:
            namespace_queries = self.nqm.get_all_queries(namespace=namespace)
            queries.extend(namespace_queries)
        for i, nq in enumerate(queries, start=1):
            for j, endpoint_name in enumerate(endpoint_names, start=1):
                logger.info(
                    f"Executing query {i}/{len(queries)} ({i/len(queries):.2%}) on endpoint {endpoint_name} ({j}/{len(endpoint_names)})"
                )
                self.execute(
                    nq,
                    endpoint_name=endpoint_name,
                    title=f"query {i:3}/{len(queries)}::{endpoint_name}",
                    context=context,
                )

is not compatible to the parallel testing style of

#!/bin/bash
# WF 2024-06-05
# Enhanced script to handle snapquery calls efficiently and with feedback

# Function to execute snapquery in the background with feedback
run_snapquery() {
    local namespace="$1"
    local endpoint="$2"
    # Sanitize the namespace to be filesystem-friendly
    local sanitized_namespace="${namespace//\//-}"  # Replace slashes with hyphens
    sanitized_namespace="${sanitized_namespace//[^a-zA-Z0-9-_]/_}"  # Replace any non-alphanumeric characters with underscores

    # Ensure the log directory exists
    local log_dir="/tmp/query_test_log"
    mkdir -p "$log_dir"

    local log="${log_dir}/${endpoint}-${sanitized_namespace}.log"
    local command="snapquery -tq -en '$endpoint' --context 'cmd_line_tests' --namespace '$namespace' > '$log' 2>&1"

    echo "Running $command..."
    nohup bash -c "$command" &
    echo "... logged at $log"
}

# Fetch namespaces and their totals dynamically from snapquery -ln command
namespace_data=$(snapquery -ln)

# Define list of endpoints
wikidata_endpoints=("wikidata" "wikidata-qlever" "wikidata-triply" "wikidata-openlinksw" "wikidata-scatter")
dblp_endpoints=("dblp")

# Process each line to extract namespaces and totals
while IFS=':' read -r namespace count; do
    echo "Processing $namespace with total entries $count"

    case "$namespace" in
        dblp.org/examples*)
            for endpoint in "${dblp_endpoints[@]}"; do
                run_snapquery "$namespace" "$endpoint"
            done
            ;;
        *fed*)
            echo "Ignoring $namespace"
            ;;
        *)
            for endpoint in "${wikidata_endpoints[@]}"; do
                run_snapquery "$namespace" "$endpoint"
            done
            ;;
    esac
done <<< "$namespace_data"

where each namespace/endpoint combination is handled in a separate process having it's own log file..