Open laughk opened 4 years ago
ここから始める (雑にぐぐると v3 のドキュメント も出てくるけど最新は v4 なので注意)
https://developers.google.com/analytics/devguides/reporting/core/v4/quickstart/service-py?hl=ja
ここの通りでうまく行かなかったこと
当然っちゃ当然なんだけど思いの外だるい。
ref. https://support.google.com/analytics/answer/1009702?hl=ja
こんな感じ。
$ python HelloAnalytics.py
Traceback (most recent call last):
File "HelloAnalytics.py", line 83, in <module>
main()
File "HelloAnalytics.py", line 78, in main
response = get_report(analytics)
File "HelloAnalytics.py", line 43, in get_report
'dimensions': [{'name': 'ga:country'}]
File "/home/laughk/work/draft-repo/ga-sample/venv37/lib/python3.7/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "/home/laughk/work/draft-repo/ga-sample/venv37/lib/python3.7/site-packages/googleapiclient/http.py", line 856, in execute
raise HttpError(resp, content, uri=self.uri)
googleapiclient.errors.HttpError: <HttpError 403 when requesting https://analyticsreporting.googleapis.com/v4/reports:batchGet?alt=json returned "Analytics Reporting API has not been used in project ************ before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/analyticsreporting.googleapis.com/overview?project=************ then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry.". Details: "[{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Google developers console API activation', 'url': 'https://console.developers.google.com/apis/api/analyticsreporting.googleapis.com/overview?project=************'}]}]">
で、いろいろ調べてみたのだけど、どうも GA の API を有効にする設定が漏れていた模様 :innocent:
$ python HelloAnalytics.py
ga:country: Finland
Date range: 0
ga:sessions: 1
ga:country: India
Date range: 0
ga:sessions: 1
ga:country: Japan
Date range: 0
ga:sessions: 330
ga:country: Malaysia
Date range: 0
ga:sessions: 1
ga:country: South Korea
Date range: 0
ga:sessions: 1
ga:country: United States
Date range: 0
ga:sessions: 9
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [
{'startDate': '7daysAgo', 'endDate': 'today'}
],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:country'}]
}
]
}
).execute()
ここの内容の調整でいい感じにデータを引っ張れる模様
dimenstions をゴニョゴニョすると特に GA で設定してなくてもそこそこのデータがとってこれる
dimensions に入れる値は↓から検索可能 https://ga-dev-tools.appspot.com/dimensions-metrics-explorer/?hl=ja
イメージ
例えば PagePath ごとの session 数を取りたいなら↓ のようにすればいい
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [
{'startDate': '365daysAgo', 'endDate': 'today'}
],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:pagePath'}]
}
]
}
).execute()
何故かこうなって lsp の補完ができない。 (突っ込んでいじろうとするとこれはかなりきつい ><) ざっとコードを漁ると
apiclient -> googleapiclient
とすると解決することがわかった
Keyword 関連で Search Console API もいじってみようか
まずは有効化
公式のサンプルコードが何故か生の API を直接叩くみたいな感じになってたので Analysis や他の事例を参考にしつつこんな感じで雑に書いてみる
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
KEY_FILE_LOCATION = '../client_secrets.json'
def initialize():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
webmasters = build('webmasters', 'v3', credentials=credentials)
return webmasters
def get(webmasters):
return webmasters.searchanalytics().query(
siteUrl='https://memo.laughk.org/',
body={
'startDate': '2019-12-11',
'endDate': '2019-12-14',
'dimensions': ['page', 'query'],
'rowLimit': 250,
},
).execute()
if __name__ == '__main__':
wb = initialize()
print(get(wb))
結果
$ python search_console.py
{'rows': [{'keys': ['https://memo.laughk.org/2017/12/03/000013.html', 'arch linux'], 'clicks': 5.0, 'impressions': 100.0, 'ctr': 0.05, 'position': 6.53}, {'keys': ['https://memo.laughk.org/2018/01/15/000016.html', 'x270 改造'], 'clicks': 3.0, 'impressions': 17.0, 'ctr': 0.17647058823529413, 'position': 7.235294117647059}, {'keys': ['https://memo.laughk.org/2017/12/03/000013.html', 'archlinux'], 'clicks': 2.0, 'impressions': 53.0, 'ctr': 0.03773584905660377, 'position': 5.811320754716981}, {'keys': ['https://memo.laughk.org/2014/04/24/shell_exec_log.html', '2>&1 | tee -a'], ....
それっぽく取れている模様。
ちなみに body の期間をあまり広くすると以下のようにエラーになってしまうので注意 (自分のブログだと4日間が限界だった)
多分クエリ内容にもよるんだと思う。データ量多めだとやっぱり応答は遅くなる。とはいえ、スクリプト実行タイミングによってエラーが出ないときもあって、 Search Console の API ってもしかして不安定 ?? という気持ちもあるっちゃある。
$ python search_console.py
Traceback (most recent call last):
File "search_console.py", line 36, in <module>
wb = initialize()
File "search_console.py", line 18, in initialize
webmasters = build('webmasters', 'v3', credentials=credentials)
File "/home/laughk/.ghq/github.com/laughk/TIL/GoogleAPI/py/venv/lib/python3.8/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "/home/laughk/.ghq/github.com/laughk/TIL/GoogleAPI/py/venv/lib/python3.8/site-packages/googleapiclient/discovery.py", line 222, in build
content = _retrieve_discovery_doc(
File "/home/laughk/.ghq/github.com/laughk/TIL/GoogleAPI/py/venv/lib/python3.8/site-packages/googleapiclient/discovery.py", line 273, in _retrieve_discovery_doc
resp, content = http.request(actual_url)
File "/home/laughk/.ghq/github.com/laughk/TIL/GoogleAPI/py/venv/lib/python3.8/site-packages/httplib2/__init__.py", line 1948, in request
(response, content) = self._request(
File "/home/laughk/.ghq/github.com/laughk/TIL/GoogleAPI/py/venv/lib/python3.8/site-packages/httplib2/__init__.py", line 1621, in _request
(response, content) = self._conn_request(
File "/home/laughk/.ghq/github.com/laughk/TIL/GoogleAPI/py/venv/lib/python3.8/site-packages/httplib2/__init__.py", line 1528, in _conn_request
conn.connect()
File "/home/laughk/.ghq/github.com/laughk/TIL/GoogleAPI/py/venv/lib/python3.8/site-packages/httplib2/__init__.py", line 1290, in connect
address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
File "/usr/lib/python3.8/socket.py", line 914, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
OSError: [Errno 16] Device or resource busy
$ python search_console.py
# 検索キーワード上位10 (期間: 2019-12-07 - 2019-12-14)
word: ['arch linux'] (click: 42.0)
word: ['archlinux'] (click: 20.0)
word: ['thinkpad キーボード 交換'] (click: 7.0)
word: ['x270 キーボード 交換'] (click: 7.0)
word: ['シェルスクリプト ログ出力'] (click: 7.0)
word: ['linux パスワード管理'] (click: 6.0)
word: ['thinkpad x270 キーボード 交換'] (click: 6.0)
word: ['x270 改造'] (click: 6.0)
word: ['bash ログ'] (click: 5.0)
word: ['querydict'] (click: 5.0)
code
import argparse
import json
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
KEY_FILE_LOCATION = '../client_secrets.json'
def initialize():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
webmasters = build('webmasters', 'v3', credentials=credentials)
return webmasters
def get(webmasters, start, end, limit):
return webmasters.searchanalytics().query(
siteUrl='https://memo.laughk.org/',
body={
'startDate': '2019-12-01',
'endDate': '2019-12-14',
'dimensions': ['query'],
'rowLimit': limit,
},
).execute()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--start', default='2019-12-07')
parser.add_argument('-e', '--end', default='2019-12-14')
parser.add_argument('-l', '--limit', default=10)
args = parser.parse_args()
wb = initialize()
rows = get(wb, args.start, args.end, args.limit)['rows']
# print(json.dumps(rows, indent=2))
print(f'# 検索キーワード上位10 (期間: {args.start} - {args.end})')
print('')
for row in rows:
print(f'word: {row["keys"]} (click: {row["clicks"]})')
if __name__ == '__main__':
main()
最近仕事で関わっているものの自力でゼロから触ったわけではないのでやってみる。 Go でやろうかなと思ったけど、最近 Python が恋しいので Python でやろう。