Benjamin-Loison / gitea

Git with a cup of tea, painless self-hosted git service
https://gitea.io
MIT License
0 stars 0 forks source link

Get rank in terms of number of contributions #74

Open Benjamin-Loison opened 2 weeks ago

Benjamin-Loison commented 2 weeks ago

Feature Description

https://codeberg.org/Benjamin_Loison?tab=activity

Can take into account my private contributions.

Web-scraping seems to be the most appropriate approach.

Screenshots

No response

Benjamin-Loison commented 2 weeks ago
import requests
from tqdm import tqdm
import json
from lxml import html
import re
import matplotlib.pyplot as plt

# https://github.com/Benjamin-Loison/gitea/issues/74

GITEA_INSTANCE_URL = 'https://codeberg.org'
# {GITEA_INSTANCE_URL}/user/settings/applications
# `Public only` > `user`: `Read`
TOKEN = 'CENSORED'

headers = {
    'Authorization': f'token {TOKEN}'
}

def getApi(url, params = {}):
    response = requests.get(f'{GITEA_INSTANCE_URL}/api/v1/{url}', params, headers = headers)
    return response.json()

users = getApi('users/search')['data']

contributionsStrRegex = re.compile('([0-9,]+) contributions in the last 12 months')

contributionsOfUsers = {}

for user in tqdm(users):
    #print(json.dumps(user, indent = 4))
    username = user['username']
    #username = 'Benjamin_Loison'
    text = requests.get(f'{GITEA_INSTANCE_URL}/{username}', {'tab': 'activity'}).text
    tree = html.fromstring(text)
    #print('2,750' in text)
    #print(tree.xpath('//div[class="total-contributions"]')[0].text_content())
    userHeatmap = tree.xpath('//div[@id="user-heatmap"]')
    if userHeatmap == []:
        continue
    contributionsStr = userHeatmap[0].attrib['data-locale-total-contributions']
    #print(contributionsStr)
    contributions = int(contributionsStrRegex.match(contributionsStr).group(1).replace(',', ''))
    #print(username, contributions)
    contributionsOfUsers[username] = contributions
    #break

CONTRIBUTIONS_OF_USERS_FILE_PATH = 'contributions_of_users.json'

with open(CONTRIBUTIONS_OF_USERS_FILE_PATH, 'w') as f:
    json.dump(contributionsOfUsers, f, indent = 4)

with open(CONTRIBUTIONS_OF_USERS_FILE_PATH) as f:
    contributionsOfUsers = json.load(f)

plt.hist(contributionsOfUsers.values(), bins = 100)
#plt.xscale('log')
plt.yscale('log')
#plt.show()
plt.savefig('contributions_of_users.svg')

for user in contributionsOfUsers:
    contributionsOfUser = contributionsOfUsers[user]
    if contributionsOfUser >= 3_649:#2_761
        print(user, f'https://codeberg.org/{user}?tab=activity')
100%|██████████| 110736/110736 [6:05:53<00:00,  5.04it/s]

contributions_of_users.json

contributions_of_users

Rank 15.

So removing these 8 automated accounts I reach rank 7.

do not have to some extent realistic contributions (they might have been automated).

Can show publicly private contributions like GitHub?