Maksim-Sheyngalts / PythonTool

1 stars 0 forks source link

Yandex pFound #23

Open Maksim-Sheyngalts opened 1 year ago

Maksim-Sheyngalts commented 1 year ago
import pandas as pd     # считываем данные  qid_query = pd.read_csv("hidden_task/qid_query.tsv", sep="\t", names=["qid", "query"])  qid_url_rating = pd.read_csv("hidden_task/qid_url_rating.tsv", sep="\t", names=["qid", "url", "rating"])    hostid_url = pd.read_csv("hidden_task/hostid_url.tsv", sep="\t", names=["hostid", "url"])   # делаем join двух таблиц, чтобы было просто брать url с максимальным рейтингом     qid_url_rating_hostid = pd.merge(qid_url_rating, hostid_url, on="url")  def plook(ind, rels):    if ind == 0:                   return 1     return plook(ind-1, rels)*(1-rels[ind-1])*(1-0.15)     def pfound(group):   max_by_host = group.groupby("hostid")["rating"].max() # максимальный рейтинг хоста      top10 = max_by_host.sort_values(ascending=False)[:10] # берем топ10 урлов с наивысшим рейтингом     pfound = 0      for ind, val in enumerate(top10):                  pfound += val*plook(ind, top10.values)   return pfound  qid_pfound = qid_url_rating_hostid.groupby('qid').apply(pfound) # группируем по qid и вычисляем pfound  qid_max = qid_pfound.idxmax() # берем qid с максимальным pfound     qid_query[qid_query["qid"] == qid_max]