Open xmnlab opened 8 months ago
import pandas as pd
import plotly.express as px
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
from typing import Any, Dict, List
def initialize_analyticsreporting() -> Any:
"""Initializes and returns the Google Analytics Reporting API V4 service object.
Returns
-------
Any
An authorized Analytics Reporting API V4 service object.
"""
# Replace with your own credentials.json file
credentials = ServiceAccountCredentials.from_json_keyfile_name(
'YOUR_CREDENTIALS_FILE.json',
['https://www.googleapis.com/auth/analytics.readonly']
)
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics: Any, start_date: str, end_date: str) -> Dict:
"""Queries the Analytics Reporting API V4 for specified metrics and dimensions.
Parameters
----------
analytics : Any
An authorized Analytics Reporting API V4 service object.
start_date : str
Start date for fetching data (format: YYYY-MM-DD).
end_date : str
End date for fetching data (format: YYYY-MM-DD).
Returns
-------
Dict
The response from the API query.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': 'YOUR_VIEW_ID',
'dateRanges': [{'startDate': start_date, 'endDate': end_date}],
'metrics': [{'expression': 'ga:sessions'}], # Example metric
'dimensions': [{'name': 'ga:country'}, {'name': 'ga:pagePath'}] # Example dimensions
}
]
}
).execute()
def convert_to_dataframe(response: Dict) -> pd.DataFrame:
"""Parses and converts the API response into a pandas DataFrame.
Parameters
----------
response : Dict
The response object from the API query.
Returns
-------
pd.DataFrame
A pandas DataFrame containing the response data.
"""
list_of_dicts = []
for report in response.get('reports', []):
columnHeader = report['columnHeader']
dimensionHeaders = columnHeader['dimensions']
metricHeaders = [i['name'] for i in columnHeader['metricHeader']['metricHeaderEntries']]
rows = report['data']['rows']
for row in rows:
dict_row = {}
dimensions = row['dimensions']
dateRangeValues = row['metrics']
for header, dimension in zip(dimensionHeaders, dimensions):
dict_row[header] = dimension
for i, values in enumerate(dateRangeValues):
for metricHeader, value in zip(metricHeaders, values['values']):
if i == 0:
dict_row[metricHeader] = value
else:
dict_row[metricHeader + str(i)] = value
list_of_dicts.append(dict_row)
return pd.DataFrame(list_of_dicts)
def create_charts(df: pd.DataFrame) -> List[px.Figure]:
"""Creates charts using Plotly based on the DataFrame.
Parameters
----------
df : pd.DataFrame
The data to plot.
Returns
-------
List[px.Figure]
A list of Plotly chart objects.
"""
charts = []
if not df.empty:
# Example chart: Sessions per Country
fig1 = px.bar(df, x='ga:country', y='ga:sessions', title='Sessions per Country')
charts.append(fig1)
# Example chart: Sessions per PagePath
fig2 = px.bar(df, x='ga:pagePath', y='ga:sessions', title='Sessions per Page')
charts.append(fig2)
return charts
def generate_html(charts: List[px.Figure], file_name: str = "report.html") -> None:
"""Generates an HTML file with the given charts.
Parameters
----------
charts : List[px.Figure]
A list of Plotly chart objects.
file_name : str, optional
Name of the output HTML file, by default "report.html".
"""
with open(file_name, 'w') as f:
for chart in charts:
f.write(chart.to_html(full_html=False, include_plotlyjs='cdn'))
f.write('<br><hr><br>') # Add separators between charts
def main() -> None:
"""Main function to handle the workflow of fetching, processing,
and visualizing Google Analytics data.
"""
analytics = initialize_analyticsreporting()
start_date = '2023-01-01' # Adjust the date range as needed
end_date = '2023-01-31'
response = get_report(analytics, start_date, end_date)
df = convert_to_dataframe(response)
charts = create_charts(df)
generate_html(charts)
if __name__ == "__main__":
main()
@DanielaIgRo do you have time this week to work on this issue?
Creating a GitHub Actions workflow to run a Python script on the first day of every month and store the output in a specific directory involves several steps. Below is a YAML configuration for a GitHub Actions workflow that achieves this. You'll need to add this configuration to your GitHub repository in the .github/workflows
directory.
Create a file named .github/workflows/monthly_stats_report.yml
with the following content:
name: Monthly Stats Report
on:
schedule:
# Runs at 00:00 on the first day of every month
- cron: '0 0 1 * *'
jobs:
generate_report:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x' # Specify the Python version
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pandas plotly google-auth-oauthlib google-auth-httplib2 google-api-python-client
- name: Run the script
run: python path/to/your/script.py
- name: Commit and push if there are changes
run: |
git config --global user.email "your-email@example.com"
git config --global user.name "Your Name"
git add docs/stats/index.html
git commit -m "Update monthly stats" || exit 0 # This will exit with 0 if there's nothing to commit
git push
Script Path: Replace path/to/your/script.py
with the actual path to your Python script within the repository.
Python Dependencies: Ensure all required Python packages are listed in the Install dependencies
step.
Git Configuration: Replace your-email@example.com
and Your Name
with your email and name used for Git commits.
Output Directory: Modify the script to output index.html
to docs/stats/
. If necessary, create these directories in your repository.
Commit and Push: The workflow commits and pushes the updated index.html
file back to your repository. Ensure this is the desired behavior.
0 0 1 * *
, which means 00:00 on the first day of every month. You can adjust this if needed.After setting up this workflow, it will automatically run on the first day of every month, executing your script and updating the index.html
file in the specified directory.
It would be great to have a page with the stats.
I will add an initial code (from gpt) for that in the next comment.