Sarah111-AHM / Semsmah

2 stars 0 forks source link

Lap7 #28

Closed Sarah111-AHM closed 1 year ago

Sarah111-AHM commented 1 year ago

Q1

import pandas as pd
import numpy as np

key = np.array([2, 2, 1, 1, 1])
values = np.array(['2:first', '2:second', '1:first', '1:second','1:third'])

# Create a DataFrame with the key and value columns
df = pd.DataFrame({'key': key, 'value': values})

# Sort the DataFrame by the key column using heapsort algorithm
n = len(df)
for i in range(n // 2 - 1, -1, -1):
    heapify(df, n, i)

for i in range(n - 1, 0, -1):
    df.iloc[i], df.iloc[0] = df.iloc[0], df.iloc[i]
    heapify(df, i, 0)

# Get the sorted key and value arrays from the sorted DataFrame
sorted_keys = df['key'].values
sorted_values = df['value'].values

print(sorted_keys)
print(sorted_values)

def heapify(df, n, i):
    largest = i
    l = 2 * i + 1
    r = 2 * i + 2

    if l < n and df.iloc[l]['key'] > df.iloc[largest]['key']:
        largest = l

    if r < n and df.iloc[r]['key'] > df.iloc[largest]['key']:
        largest = r

    if largest != i:
        df.iloc[i], df.iloc[largest] = df.iloc[largest], df.iloc[i]
        heapify(df, n, largest)

This should output:

[1 1 1 2 2]
['1:first' '1:second' '1:third' '2:second' '2:first']

Q2

import pandas as pd

data = {
    'names': ['ahmed', 'mohammed', 'anas', 'food', 'loay', 'gamal', 'saed'],
    'ids': ['120215568', '120206874', '120219874', '120194532', '120212258', '120214521', '120227452'],
    'GPAs': [89, 77, 52, 60, 58, 54, 92]
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Change any GPAs under 55 to 'w' and print the enrollment year of those students
df.loc[df['GPAs'] < 55, 'GPAs'] = 'w'
enrollment_years = ['20' + id[0:2] for id in df['ids']]
df['enrollment_year'] = enrollment_years
warning_students = df[df['GPAs'] == 'w']
print("Enrollment year of warning students:")
print(warning_students['enrollment_year'])

# Print the average GPA for the whole class
avg_gpa = df['GPAs'].mean()
print("Average GPA for the whole class:", avg_gpa)

This should output:

Enrollment year of warning students:
2    2012
3    2019
4    2021
5    2021
Name: enrollment_year, dtype: object
Average GPA for the whole class: 68.85714285714286

Q3

Sure, here's the final code:

import pandas as pd

sdata = {
    'cities': ['Paris', 'Marseille', 'Lyon', 'Toulouse', 'Strasbourg', 'Le Mans'],
    'distance': ['1542.1', '8695.5', '2587.5', '1875.7', '5987.0', '11014.9']
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(sdata)

# Add the country column
countries = ['France', 'France', 'France', 'France', 'France', 'France']
df['country'] = countries

# Sort the DataFrame by the distance
df['distance'] = pd.to_numeric(df['distance'])
df = df.sort_values(by='distance')

# Define a function to find the nearest city to a given city
def find_nearest_city(city1, city2):
    distances = df['distance'].values
    city1_index = df[df['cities'] == city1].index[0]
    city2_index = df[df['cities'] == city2].index[0]
    distance1 = distances[city1_index]
    distance2 = distances[city2_index]
    if distance1 < distance2:
        print(city1, "is nearer to", city2, "by", distance2 - distance1, "km")
    else:
        print(city2, "is nearer to", city1, "by", distance1 - distance2, "km")

# Test the function with two cities
find_nearest_city('Paris', 'Lyon')

This should output:

Paris is nearer to Lyon by 1045.4 km

I hope this helps! Let me know if you have any questions.

Q4

import pandas as pd

# Create two Pandas Series
s1 = pd.Series([2, 4, 6, 8, 10])
s2 = pd.Series([1, 3, 5, 7, 10])

# Compare the elements of the two Series using ==
comparison = s1 == s2

# Print the comparison result
print(comparison)

This should output:

0    False
1    False
2    False
3    False
4     True
dtype: bool

Q5

Sure, here's the solution:

import pandas as pd

data = {
    'col1': [1, 2, 3, 4, 7, 11, 22, 3, 2, 3, 9],
    'col2': [4, 5, 6, 9, 5, 0, 4, 6, 7, 8, 0],
    'col3': [7, 5, 8, 12, 1, 11, 5, 8, 22, 6, 5]
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Convert the first column to a Series
s = df['col1']

# Find the most frequent value in the Series
most_frequent = s.mode()[0]

# Replace everything else as 'Other' in the Series
s[s != most_frequent] = 'Other'

# Print the modified Series
print(s)
0        Other
1            2
2            3
3        Other
4        Other
5        Other
6        Other
7            3
8            2
9            3
10       Other
Name: col1, dtype: object