codeforcroatia / imamopravoznati-tjv

TJV Parser is a script that will scrape and parse public authorities file and post online in open format
https://morph.io/codeforcroatia/imamopravoznati-tjv
0 stars 2 forks source link

Test case for the script #9

Open schlos opened 1 year ago

schlos commented 1 year ago

Here's a possible test case for this Python scraper code:

import unittest
import pandas as pd
import sqlite3

class TestScraper(unittest.TestCase):
    def setUp(self):
        # Scrape the data and store it in a DataFrame
        base_data = pd.read_csv("http://tjv.pristupinfo.hr/?sort=1&page=1&download", error_bad_lines=False, sep=';', index_col='Rb.')

        # Connect to the data.sqlite database
        conn = sqlite3.connect("data.sqlite")

        # Write the data to the database
        base_data.to_sql("data", conn, if_exists='replace', index=False)

    def test_scraped_data(self):
        # Connect to the data.sqlite database
        conn = sqlite3.connect("data.sqlite")

        # Read the data from the database into a DataFrame
        data = pd.read_sql_query("SELECT * FROM data", conn)

        # Assert that the number of rows in the data is greater than 0
        self.assertGreater(len(data), 0)

        # Assert that the OIB column exists in the data
        self.assertIn("OIB", data.columns)

        # Assert that the entity_name column exists in the data
        self.assertIn("entity_name", data.columns)

        # Assert that the legal_status column exists in the data
        self.assertIn("legal_status", data.columns)

        # Assert that the OIB column contains only strings
        self.assertTrue(all(map(lambda x: isinstance(x, str), data["OIB"])))

        # Assert that the entity_name column contains only strings
        self.assertTrue(all(map(lambda x: isinstance(x, str), data["entity_name"])))

        # Assert that the legal_status column contains only strings
        self.assertTrue(all(map(lambda x: isinstance(x, str), data["legal_status"])))

if __name__ == '__main__':
    unittest.main()

This test case uses the unittest library to test the Python scraper code. The test case includes tests for the number of rows in the data, the existence of specific columns, and the data type of specific columns.