NHFLO / data

Data repository of NHFLO. Contains data descriptions and mockup data for testing. Does not include the actual data.
GNU Affero General Public License v3.0
1 stars 2 forks source link

Improve test repository.yml #13

Open bdestombe opened 2 days ago

bdestombe commented 2 days ago

Test if all fields are present Test if versions are sorted

def compare_semver(version1: str, version2: str) -> int:
    """
    Compare two semantic version strings according to semver.org rules.

    Parameters
    ----------
    version1 : str
        First semantic version string to compare (e.g., "1.2.3" or "1.2.3-alpha.1")
    version2 : str
        Second semantic version string to compare (e.g., "1.2.3" or "1.2.3-beta.1")

    Returns
    -------
    int
        1 if version1 is newer than version2
        -1 if version1 is older than version2
        0 if they are equal

    Raises
    ------
    ValueError
        If either version string doesn't follow semantic versioning format

    Notes
    -----
    Follows semantic versioning 2.0.0 specification from semver.org:
    - Versions are compared by major.minor.patch numbers first
    - Pre-release versions have lower precedence than normal versions
    - Pre-release versions are compared alphanumerically by identifiers
    - Numeric identifiers are compared numerically
    - Alphabetic identifiers are compared alphabetically

    Examples
    --------
    >>> compare_semver("2.1.0", "2.0.0")
    1
    >>> compare_semver("1.0.0", "2.0.0")
    -1
    >>> compare_semver("1.0.0", "1.0.0")
    0
    >>> compare_semver("1.0.0-alpha", "1.0.0")
    -1
    >>> compare_semver("1.0.0-alpha.1", "1.0.0-alpha.2")
    -1
    >>> compare_semver("1.0.0-alpha.beta", "1.0.0-alpha.1")
    1

    See Also
    --------
    https://semver.org/ : Official semantic versioning specification
    """
    def parse_version(version: str) -> tuple:
        # Split version into parts and prerelease if exists
        if '-' in version:
            version_part, prerelease = version.split('-', 1)
        else:
            version_part, prerelease = version, ''

        # Parse version numbers
        try:
            major, minor, patch = map(int, version_part.split('.'))
        except ValueError:
            raise ValueError(f"Invalid version format: {version}")

        return (major, minor, patch, prerelease)

    try:
        v1_major, v1_minor, v1_patch, v1_pre = parse_version(version1)
        v2_major, v2_minor, v2_patch, v2_pre = parse_version(version2)

        # Compare major.minor.patch
        v1_tuple = (v1_major, v1_minor, v1_patch)
        v2_tuple = (v2_major, v2_minor, v2_patch)

        if v1_tuple > v2_tuple:
            return 1
        if v1_tuple < v2_tuple:
            return -1

        # If major.minor.patch are equal, check pre-release
        # No pre-release is greater than any pre-release version
        if not v1_pre and v2_pre:
            return 1
        if v1_pre and not v2_pre:
            return -1
        if v1_pre == v2_pre:
            return 0

        # Compare pre-release versions
        # Split by dots and compare each identifier
        v1_pre_parts = v1_pre.split('.')
        v2_pre_parts = v2_pre.split('.')

        for p1, p2 in zip(v1_pre_parts, v2_pre_parts):
            # Try to convert to integers if possible
            try:
                n1 = int(p1)
                n2 = int(p2)
                if n1 != n2:
                    return 1 if n1 > n2 else -1
            except ValueError:
                # Compare as strings if not both numbers
                if p1 != p2:
                    return 1 if p1 > p2 else -1

        # If one prerelease version has more parts, it's newer
        return 1 if len(v1_pre_parts) > len(v2_pre_parts) else -1 if len(v1_pre_parts) < len(v2_pre_parts) else 0

    except Exception as e:
        raise ValueError(f"Invalid version format: {str(e)}")
bdestombe commented 2 days ago
import unittest
from typing import List, Tuple

class TestSemverCompare(unittest.TestCase):
    def assert_version_order(self, versions: List[str]):
        """Helper method to verify that a list of versions is in ascending order"""
        for i in range(len(versions) - 1):
            result = compare_semver(versions[i], versions[i + 1])
            self.assertEqual(
                result, -1,
                f"Expected {versions[i]} to be older than {versions[i + 1]}"
            )

    def test_basic_comparison(self):
        """Test basic version number comparisons"""
        test_cases = [
            ("1.0.0", "2.0.0", -1),
            ("2.0.0", "1.0.0", 1),
            ("2.1.0", "2.0.0", 1),
            ("2.0.1", "2.0.0", 1),
            ("1.0.0", "1.0.0", 0),
        ]

        for v1, v2, expected in test_cases:
            with self.subTest(v1=v1, v2=v2):
                self.assertEqual(compare_semver(v1, v2), expected)

    def test_prerelease_comparison(self):
        """Test pre-release version comparisons"""
        test_cases = [
            ("1.0.0-alpha", "1.0.0", -1),
            ("1.0.0", "1.0.0-alpha", 1),
            ("1.0.0-alpha", "1.0.0-beta", -1),
            ("1.0.0-beta", "1.0.0-alpha", 1),
            ("1.0.0-alpha.1", "1.0.0-alpha.2", -1),
            ("1.0.0-alpha.2", "1.0.0-alpha.1", 1),
            ("1.0.0-alpha.1", "1.0.0-alpha.1", 0),
        ]

        for v1, v2, expected in test_cases:
            with self.subTest(v1=v1, v2=v2):
                self.assertEqual(compare_semver(v1, v2), expected)

    def test_mixed_numeric_string_prerelease(self):
        """Test mixed numeric and string pre-release identifiers"""
        test_cases = [
            ("1.0.0-alpha.1", "1.0.0-alpha.beta", -1),
            ("1.0.0-alpha.beta", "1.0.0-beta.2", -1),
            ("1.0.0-beta.2", "1.0.0-beta.11", -1),
            ("1.0.0-beta.11", "1.0.0-rc.1", -1),
        ]

        for v1, v2, expected in test_cases:
            with self.subTest(v1=v1, v2=v2):
                self.assertEqual(compare_semver(v1, v2), expected)

    def test_prerelease_length_comparison(self):
        """Test comparison of pre-release versions with different numbers of identifiers"""
        test_cases = [
            ("1.0.0-alpha", "1.0.0-alpha.1", -1),
            ("1.0.0-alpha.1", "1.0.0-alpha.1.1", -1),
            ("1.0.0-alpha.1.1", "1.0.0-alpha.1", 1),
        ]

        for v1, v2, expected in test_cases:
            with self.subTest(v1=v1, v2=v2):
                self.assertEqual(compare_semver(v1, v2), expected)

    def test_invalid_versions(self):
        """Test that invalid version formats raise ValueError"""
        invalid_versions = [
            "1.0",
            "1.0.0.0",
            "1",
            "a.b.c",
            "1.0.0-",
            "-1.0.0",
            "1.0.0-alpha..1",
            "",
            "not_a_version"
        ]

        valid_version = "1.0.0"
        for invalid_version in invalid_versions:
            with self.subTest(invalid_version=invalid_version):
                with self.assertRaises(ValueError):
                    compare_semver(invalid_version, valid_version)
                with self.assertRaises(ValueError):
                    compare_semver(valid_version, invalid_version)

    def test_version_ordering(self):
        """Test that a sequence of versions is correctly ordered"""
        ordered_versions = [
            "1.0.0-alpha",
            "1.0.0-alpha.1",
            "1.0.0-alpha.beta",
            "1.0.0-beta",
            "1.0.0-beta.2",
            "1.0.0-beta.11",
            "1.0.0-rc.1",
            "1.0.0",
            "1.0.1",
            "1.1.0",
            "2.0.0",
        ]
        self.assert_version_order(ordered_versions)

if __name__ == '__main__':
    unittest.main()
bdestombe commented 2 days ago
"""Tests for NHFLO schema validation.

This module provides tests to validate YAML strings against the NHFLO schema.
The schema itself is read from 'nhflo_schema.yaml'.
"""

import pytest
import yamale
from yamale.validators import DefaultValidators, Validator

class PathsMatchVersion(Validator):
    """Validator to ensure all paths end with version_nhflo.

    This validator checks that all path fields (local, nhflo_server, mockup)
    end with the same version number as specified in version_nhflo.
    """

    def _is_valid(self, value):
        """Validate that all paths end with version_nhflo.

        Parameters
        ----------
        value : dict
            Dictionary containing path fields (local, nhflo_server, mockup)

        Returns
        -------
        bool
            True if all paths end with version_nhflo, False otherwise
        """
        version_nhflo = self.value_context.get('version_nhflo')
        if not version_nhflo:
            return False

        return (value['local'].endswith(version_nhflo) and
                value['nhflo_server'].endswith(version_nhflo) and
                value['mockup'].endswith(version_nhflo))

@pytest.fixture
def schema():
    """Load the NHFLO schema.

    Returns
    -------
    str
        Content of the schema file
    """
    with open('nhflo_schema.yaml', 'r') as f:
        return f.read()

def validate_yaml_string(yaml_str, schema_str):
    """Validate a YAML string against the NHFLO schema.

    Parameters
    ----------
    yaml_str : str
        YAML content to validate
    schema_str : str
        Schema content to validate against

    Raises
    ------
    yamale.YamaleError
        If validation fails
    """
    validators = DefaultValidators.copy()
    validators['paths_match_version'] = PathsMatchVersion

    schema = yamale.make_schema(content=schema_str, validators=validators)
    data = yamale.make_data(content=yaml_str)
    yamale.validate(schema, data)

def test_valid_yaml_structure(schema):
    """Test validation of a complete valid YAML.

    Parameters
    ----------
    schema : str
        Pytest fixture containing the schema
    """
    valid_yaml = """
title: NHFLO data repository
description: Repository of NHFLO containing the data structure

data:
  bodemlagen_pwn_bergen:
    - version_nhflo: v1.0.0
      owner: PWN
      publication_date: "2023-08-20"
      version_owner: "2020-04-06"
      description_short: Test dataset
      description_long: Complete test dataset with valid structure
      contact: test@example.com
      timezone: Europe/Amsterdam
      extent: [-7000, 289000, 300000, 629000]
      paths:
        local: bodemlagen_pwn_bergen/v1.0.0
        nhflo_server: /data/bodemlagen_pwn_bergen/v1.0.0
        mockup: mockup/bodemlagen_pwn_bergen/v1.0.0
      changelog:
        previous_version: 0.0.0
        log: "Initial version"
"""
    validate_yaml_string(valid_yaml, schema)

def test_invalid_version_format(schema):
    """Test validation fails with invalid version formats.

    Parameters
    ----------
    schema : str
        Pytest fixture containing the schema
    """
    invalid_versions = [
        "1.0.0",      # Missing v prefix
        "v1.0",       # Missing patch version
        "v1",         # Missing minor and patch
        "va.b.c",     # Non-numeric version
        "v1.0.0.0"    # Too many version numbers
    ]

    base_yaml = """
title: NHFLO data repository
description: Test repository
data:
  test_dataset:
    - version_nhflo: "{version}"
      owner: TEST
      publication_date: "2024-01-01"
      version_owner: "1.0.0"
      description_short: Test
      description_long: Test description
      contact: test@example.com
      timezone: Europe/Amsterdam
      extent: [0, 0, 1, 1]
      paths:
        local: test_dataset/v1.0.0
        nhflo_server: /data/test_dataset/v1.0.0
        mockup: mockup/test_dataset/v1.0.0
      changelog:
        previous_version: 0.0.0
        log: "Test"
"""

    for version in invalid_versions:
        yaml_str = base_yaml.format(version=version)
        with pytest.raises(yamale.YamaleError):
            validate_yaml_string(yaml_str, schema)

def test_path_consistency(schema):
    """Test validation fails when paths don't match version_nhflo.

    Parameters
    ----------
    schema : str
        Pytest fixture containing the schema
    """
    inconsistent_yaml = """
title: NHFLO data repository
description: Test repository
data:
  test_dataset:
    - version_nhflo: v1.0.0
      owner: TEST
      publication_date: "2024-01-01"
      version_owner: "1.0.0"
      description_short: Test
      description_long: Test description
      contact: test@example.com
      timezone: Europe/Amsterdam
      extent: [0, 0, 1, 1]
      paths:
        local: test_dataset/v1.0.1
        nhflo_server: /data/test_dataset/v1.0.0
        mockup: mockup/test_dataset/v1.0.0
      changelog:
        previous_version: 0.0.0
        log: "Test"
"""
    with pytest.raises(yamale.YamaleError):
        validate_yaml_string(inconsistent_yaml, schema)

def test_invalid_path_formats(schema):
    """Test validation fails with invalid path formats.

    Parameters
    ----------
    schema : str
        Pytest fixture containing the schema
    """
    test_cases = [
        {
            'description': 'Invalid local path format',
            'paths': {
                'local': 'test/v1.0.0',  # Invalid characters in dataset name
                'nhflo_server': '/data/test_dataset/v1.0.0',
                'mockup': 'mockup/test_dataset/v1.0.0'
            }
        },
        {
            'description': 'Missing v prefix in version',
            'paths': {
                'local': 'test_dataset/1.0.0',
                'nhflo_server': '/data/test_dataset/v1.0.0',
                'mockup': 'mockup/test_dataset/v1.0.0'
            }
        },
        {
            'description': 'Invalid server path prefix',
            'paths': {
                'local': 'test_dataset/v1.0.0',
                'nhflo_server': 'data/test_dataset/v1.0.0',  # Missing leading slash
                'mockup': 'mockup/test_dataset/v1.0.0'
            }
        }
    ]

    base_yaml = """
title: NHFLO data repository
description: Test repository
data:
  test_dataset:
    - version_nhflo: v1.0.0
      owner: TEST
      publication_date: "2024-01-01"
      version_owner: "1.0.0"
      description_short: Test
      description_long: Test description
      contact: test@example.com
      timezone: Europe/Amsterdam
      extent: [0, 0, 1, 1]
      paths:
        local: "{local}"
        nhflo_server: "{server}"
        mockup: "{mockup}"
      changelog:
        previous_version: 0.0.0
        log: "Test"
"""

    for case in test_cases:
        yaml_str = base_yaml.format(
            local=case['paths']['local'],
            server=case['paths']['nhflo_server'],
            mockup=case['paths']['mockup']
        )
        with pytest.raises(yamale.YamaleError, message=case['description']):
            validate_yaml_string(yaml_str, schema)

if __name__ == "__main__":
    import sys

    if len(sys.argv) != 2:
        print("Usage: python test_nhflo_schema.py <path_to_yaml>")
        sys.exit(1)

    # Read YAML file content
    with open(sys.argv[1], 'r') as f:
        yaml_content = f.read()

    # Read schema file
    with open('nhflo_schema.yaml', 'r') as f:
        schema_content = f.read()

    try:
        validate_yaml_string(yaml_content, schema_content)
        print(f"✓ {sys.argv[1]} is valid")
    except yamale.YamaleError as e:
        print(f"✗ Validation failed:")
        print(str(e))
        sys.exit(1)
bdestombe commented 1 day ago

Split validate and lint. Lint may fail

"""Tests for YAML schema validation and linting.

This module validates both schema compliance using yamale and style rules using yamllint.
"""

import pytest
import yamale
from yamale.validators import DefaultValidators, Validator
from yamllint import config as lint_config, linter

class PathsMatchVersion(Validator):
    """Ensure all paths end with version_nhflo."""

    def _is_valid(self, value):
        version_nhflo = self.value_context.get('version_nhflo')
        if not version_nhflo:
            return False

        return (value['local'].endswith(version_nhflo) and
                value['nhflo_server'].endswith(version_nhflo) and
                value['mockup'].endswith(version_nhflo))

def test_yaml_file():
    """Test both schema validation and linting rules."""
    # Read YAML content
    with open('your_data.yaml', 'r') as f:
        yaml_content = f.read()

    # Schema validation
    validators = DefaultValidators.copy()
    validators['paths_match_version'] = PathsMatchVersion

    schema = yamale.make_schema('nhflo_schema.yaml', validators=validators)
    data = yamale.make_data(content=yaml_content)
    yamale.validate(schema, data)

    # Lint validation
    lint_conf = lint_config.YamlLintConfig("""
        extends: default
        rules:
            document-start: disable
            line-length: disable
            empty-lines:
                max: 1
                max-start: 1
                max-end: 1
            indentation:
                spaces: 2
                indent-sequences: true
            braces:
                min-spaces-inside: 0
                max-spaces-inside: 0
            brackets:
                min-spaces-inside: 0
                max-spaces-inside: 0
            comments:
                min-spaces-from-content: 2
    """)

    problems = list(linter.run(yaml_content, lint_conf))

    # Format lint problems into readable message if any exist
    if problems:
        error_msg = "\nLinting problems found:\n"
        for problem in problems:
            error_msg += f"Line {problem.line}: {problem.message}\n"
        pytest.fail(error_msg)

if __name__ == "__main__":
    pytest.main([__file__, "-v"])
# NHFLO Data Repository Schema
# Version: 1.0.0
# This schema defines the structure and validation rules for the NHFLO data repository

# Root level schema
# - title: Name of the repository
# - description: General description of the repository contents
# - data: Map of dataset names to their versions
title: str(required=True)
description: str(required=True)
data: map(include('dataset_name'), required=True)

---
# Schema for dataset name entries
# Each dataset name (e.g., 'bodemlagen_pwn_bergen') contains a list of versions
dataset_name: list(include('dataset_version'))

---
# Schema for a single dataset version
# Defines all required fields and their validation patterns
dataset_version:
  # Must start with 'v' followed by semantic version (e.g., v1.0.0)
  version_nhflo: str(pattern='^v\d+\.\d+\.\d+$', required=True)

  # Organization or person that owns the data
  owner: str(required=True)

  # ISO format date (YYYY-MM-DD)
  publication_date: str(pattern='^\d{4}-\d{2}-\d{2}$', required=True)

  # Version used by the data owner
  version_owner: str(required=True)

  # Brief description (one line)
  description_short: str(required=True)

  # Detailed description
  description_long: str(required=True)

  # Valid email address
  contact: str(pattern='^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', required=True)

  # Must be valid timezone name
  timezone: str(required=True)

  # Spatial extent [minx, miny, maxx, maxy]
  extent: list(num(), length=4, required=True)

  # Path information
  paths: include('paths', required=True)

  # Version history
  changelog: include('changelog', required=True)

---
# Schema for paths
# All paths must end with the same version_nhflo as specified above
paths:
  # Dataset name followed by version (e.g., wells_pwn/v1.0.0)
  local: str(pattern='^[a-zA-Z_]+[a-zA-Z0-9_]*/v\d+\.\d+\.\d+$', required=True)

  # Must start with /data/ (e.g., /data/wells_pwn/v1.0.0)
  nhflo_server: str(pattern='^/data/[a-zA-Z_]+[a-zA-Z0-9_]*/v\d+\.\d+\.\d+$', required=True)

  # Must start with mockup/ (e.g., mockup/wells_pwn/v1.0.0)
  mockup: str(pattern='^mockup/[a-zA-Z_]+[a-zA-Z0-9_]*/v\d+\.\d+\.\d+$', required=True)

---
# Schema for changelog
# Tracks version history and changes
changelog:
  # Either semantic version with 'v' prefix or 0.0.0 for initial version
  previous_version: str(pattern='^(v\d+\.\d+\.\d+|0\.0\.0)$', required=True)

  # Description of changes from previous version
  log: str(required=True)