ozkary / ai-engineering

Working use cases with AI written on Python and TypeScript
https://www.ozkary.com/2023/05/ai-engineering-generate-code-from-user-stories.html
Apache License 2.0
5 stars 0 forks source link

Transform the data frame by merging the date and time field into a single column #1

Open ozkary opened 1 year ago

ozkary commented 1 year ago

As a data scientist, I want to generate code using the following technologies, requirements, and specifications:

Technologies:

Requirements:

Specifications:

ozkary commented 1 year ago

test api via postman

ozkary commented 1 year ago

Generated code:

import pandas as pd

def transform_data(csv_path):
    '''
    Transform a data frame by consolidating the 'date' and 'time' columns into a date time field column named 'created'.

    Parameters
    ----------
    csv_path : str
        The path of the CSV file to be transformed

    Returns
ozkary commented 1 year ago

Code:

import pandas as pd import unittest

def transform_data(csv_file_path): """ Transform a data frame by consolidating the 'date' and 'time' columns into a date time field column named 'created'.

Parameters:
csv_file_path (str): The path of the CSV file.

Returns:
bool: True if successful, False if not.
"""
try:
    # Load the data from the CSV file
    data_frame = pd.read_csv(csv_file_path)

    # Combine the 'date' and 'time' columns into a single 'created' column
    data_frame['created'] = pd.to_datetime(data_frame['date'] + ' ' + data_frame['time'])

    # Save the resulting data frame to disk in Parquet format
    data_frame.to_parquet('data_frame.parquet')
ozkary commented 1 year ago

Generated code:


import pandas as pd
import unittest

def transform_data(csv_file_path):
    """
    Transform a data frame by consolidating the 'date' and 'time' columns into a date time field column named 'created'.

    Parameters:
    csv_file_path (str): The path of the CSV file to be transformed.

    Returns:
    bool: True if the transformation was successful, False otherwise.
    """
    # Load the data from the CSV file
    df = pd.read_csv(csv_file_path)

    # Convert the 'date' and 'time' columns into a single 'created' column
    df['created'] = pd.to_datetime(df['date'] + ' ' + df['time'])

    # Save the transformed data frame to disk in Parquet format
    df.to_parquet('transformed_data.parquet')

    return True

class TransformDataTest(unittest.TestCase):
    """Unit tests for the `transform_data` function."""

    def test_with_valid_data(self):
        """Test the function with valid data."""
        # Create a mock CSV file
        with open('mock_data.csv', 'w') as f:
            f.write('date,time\n')
ozkary commented 1 year ago

Generated code:


import pandas as pd
import unittest

def transform_data(csv_file_path):
    """
    Transforms a data frame by consolidating the 'date' and 'time' columns into a date time field column named 'created'.

    Parameters:
        csv_file_path (str): The path to the CSV file to transform.

    Returns:
        bool: True if successful, False if not.
    """
    try:
        # Load the CSV file into a pandas data frame
        df = pd.read_csv(csv_file_path)

        # Create a new column 'created' by combining the 'date' and 'time' columns
        df['created'] = pd.to_datetime(df['date'] + ' ' + df['time'])

        # Drop the 'date' and 'time' columns
        df.drop(['date', 'time'], axis=1, inplace=True)

        # Save the resulting data frame to disk in Parquet format
        df.to_parquet('transformed_data.parquet')

        return True
    except:
        return False

class TestTransformData(unittest.TestCase):
    """Unit tests for the `transform_data` function."""

    def test_transform_data_success(self):
ozkary commented 1 year ago

Generated code:


# Import Libraries
import pandas as pd
import pytest

# Transform Data Function
def transform_data(csv_file_path):
    '''
    Transforms a data frame by consolidating the 'date' and 'time' columns into a date time field column named 'created'.

    Parameters:
        csv_file_path (str): The path to the CSV file containing the data to transform.

    Returns:
        success (bool): True if successful, false if not.
    '''
    try:
        # Load data from CSV file
        df = pd.read_csv(csv_file_path)

        # Consolidate date and time columns into a date time field
        df['created'] = pd.to_datetime(df['date'] + ' ' + df['time'])

        # Save data frame to disk in Parquet format
        df.to_parquet('transformed_data.parquet')

        # Return success
        return True

    except:
        # Return failure
        return False

# Unit Tests
@pytest.mark.parametrize('csv_file_path, expected_result', [
    ('test_data.csv', True),
    ('invalid_path.csv', False)
])
def test_transform_data(csv_
ozkary commented 1 year ago

Generated code:


## transform_data Function
This function takes a file path to a CSV file and transforms it by consolidating the 'date' and 'time' columns into a `created` column of type `datetime`.

```python
import pandas as pd

def transform_data(csv_file_path):
    # Read CSV file
    df = pd.read_csv
ozkary commented 3 months ago

$Provider.GEMINI Generated code:

import pandas as pd

def transform_data(csv_path, output_path):
    """
    Transforms a data frame by consolidating the 'date' and 'time' columns into a date time field column named 'created'.

    Args:
        csv_path (str): The path to the CSV file containing the data.
        output_path (str): The path to the Parquet file where the transformed data should be saved.

    Returns:
        bool: True if the transformation was successful, False otherwise.
    """
    try:
        # Load the data from the CSV file
        df = pd.read_csv(csv_path)

        # Consolidate the 'date' and 'time' columns into a date time field column named 'created'
        df['created'] = pd.to_datetime(df['date'] + ' ' + df['time'])

        # Drop the 'date' and 'time' columns
        df.drop(columns=['date', 'time'], inplace=True)

        # Save the resulting data frame to disk in Parquet format
        df.to_parquet(output_path)

        return True
    except Exception as e:
        print(f'Error transforming data: {e}')
        return False

def test_transform_data():
    """
    Unit test for the `transform_data` function.
    """
    # Create a test data frame
    df = pd.DataFrame({
        'date': ['2023-03-08', '2023-03-09', '2023-03-10'],
        'time': ['10:00:00', '11:00:00', '12:00:00'],
        'value': [1, 2, 3]
    })

    # Transform the test data frame
    transformed_df = transform_data(df)

    # Check that the transformed data frame has the expected shape and columns
    assert transformed_df.shape == (3, 2)
    assert 'created' in transformed_df.columns

    # Check that the 'created' column has the expected values
    assert transformed_df['created'][0] == pd.to_datetime('2023-03-08 10:00:00')
    assert transformed_df['created'][1] == pd.to_datetime('2023-03-09 11:00:00')
    assert transformed_df['created'][2] == pd.to_datetime('2023-03-10 12:00:00')

if __name__ == '__main__':
    # Test the `transform_data` function
    test_transform_data()
ozkary commented 2 weeks ago

Provider.GEMINI Generated code:

import pandas as pd

def transform_data(csv_file_path, output_file_path):
  """
  Transforms a data frame by consolidating the 'date' and 'time' columns into a date time field column named 'created'.

  Args:
    csv_file_path: The CSV file path of the data frame to transform.
    output_file_path: The Parquet file path to save the resulting data frame.

  Returns:
    True if successful, False otherwise.
  """
  try:
    # Load the data frame from the CSV file
    df = pd.read_csv(csv_file_path)

    # Consolidate the 'date' and 'time' columns into a date time field column
    df['created'] = pd.to_datetime(df['date'] + ' ' + df['time'])

    # Drop the original 'date' and 'time' columns
    df.drop(columns=['date', 'time'], inplace=True)

    # Save the resulting data frame to disk in Parquet format
    df.to_parquet(output_file_path)

    return True
  except Exception as e:
    print('Error transforming data frame:', e)
    return False

def test_transform_data():
    # Create a test data frame
    test_df = pd.DataFrame({
        'date': ['2023-03-08', '2023-03-09', '2023-03-10'],
        'time': ['10:00:00', '11:00:00', '12:00:00'],
        'value': [1, 2, 3]
    })

    # Transform the test data frame
    transformed_df = transform_data(test_df, 'test_output.parquet')

    # Assert that the transformed data frame has the correct shape and columns
    assert transformed_df.shape == (3, 2)
    assert list(transformed_df.columns) == ['created', 'value']

    # Assert that the 'created' column is of type datetime
    assert transformed_df['created'].dtype == 'datetime64[ns]'

    # Assert that the 'created' column contains the correct values
    assert transformed_df['created'].tolist() == ['2023-03-08 10:00:00', '2023-03-09 11:00:00', '2023-03-10 12:00:00']

if __name__ == '__main__':
    test_transform_data()