Closed WolfgangFahl closed 1 week ago
def setup_logger(self, level: int):
"""
setup logging for the cmd
"""
def snapquery_evaluate(self, args: argparse.Namespace):
"""
Handle the evaluation of different endpoints by executing queries and storing the stats
Args:
args: argparse namespace
"""
endpoint_names = args.endpoints
namespaces = args.namespaces
context = args.context
self.nqm = NamedQueryManager.from_samples()
if not endpoint_names:
endpoint_names = list(self.nqm.endpoints.keys())
# validate endpoint names
skipped_namespaces = []
for endpoint_name in endpoint_names:
if endpoint_name not in self.nqm.endpoints:
logger.error(
f"Endpoint {endpoint_name} is not known and thus will be skipped"
)
skipped_namespaces.append(endpoint_name)
endpoint_names = [
endpoint_name
for endpoint_name in endpoint_names
if endpoint_name not in skipped_namespaces
]
queries = []
for namespace in namespaces:
namespace_queries = self.nqm.get_all_queries(namespace=namespace)
queries.extend(namespace_queries)
for i, nq in enumerate(queries, start=1):
for j, endpoint_name in enumerate(endpoint_names, start=1):
logger.info(
f"Executing query {i}/{len(queries)} ({i/len(queries):.2%}) on endpoint {endpoint_name} ({j}/{len(endpoint_names)})"
)
self.execute(
nq,
endpoint_name=endpoint_name,
title=f"query {i:3}/{len(queries)}::{endpoint_name}",
context=context,
)
is not compatible to the parallel testing style of
#!/bin/bash
# WF 2024-06-05
# Enhanced script to handle snapquery calls efficiently and with feedback
# Function to execute snapquery in the background with feedback
run_snapquery() {
local namespace="$1"
local endpoint="$2"
# Sanitize the namespace to be filesystem-friendly
local sanitized_namespace="${namespace//\//-}" # Replace slashes with hyphens
sanitized_namespace="${sanitized_namespace//[^a-zA-Z0-9-_]/_}" # Replace any non-alphanumeric characters with underscores
# Ensure the log directory exists
local log_dir="/tmp/query_test_log"
mkdir -p "$log_dir"
local log="${log_dir}/${endpoint}-${sanitized_namespace}.log"
local command="snapquery -tq -en '$endpoint' --context 'cmd_line_tests' --namespace '$namespace' > '$log' 2>&1"
echo "Running $command..."
nohup bash -c "$command" &
echo "... logged at $log"
}
# Fetch namespaces and their totals dynamically from snapquery -ln command
namespace_data=$(snapquery -ln)
# Define list of endpoints
wikidata_endpoints=("wikidata" "wikidata-qlever" "wikidata-triply" "wikidata-openlinksw" "wikidata-scatter")
dblp_endpoints=("dblp")
# Process each line to extract namespaces and totals
while IFS=':' read -r namespace count; do
echo "Processing $namespace with total entries $count"
case "$namespace" in
dblp.org/examples*)
for endpoint in "${dblp_endpoints[@]}"; do
run_snapquery "$namespace" "$endpoint"
done
;;
*fed*)
echo "Ignoring $namespace"
;;
*)
for endpoint in "${wikidata_endpoints[@]}"; do
run_snapquery "$namespace" "$endpoint"
done
;;
esac
done <<< "$namespace_data"
where each namespace/endpoint combination is handled in a separate process having it's own log file..