sotetsuk / goscholar

Google scholar scraper written in Go
MIT License
17 stars 7 forks source link

New API desgin #72

Closed sotetsuk closed 8 years ago

sotetsuk commented 8 years ago

API design

Query

Query create google scholar Query

q := Query{"keywords": "xxxxxxxx", "author":"y bengio"}
url := q.SearchUrl()

q := Query{"clusterid": "xxxxxxxxx"}
url := q.FindUrl()
url := q.CiteUrl()

q := Query{"infoid": "xxxxxxx"}
url := q.CitePopUpUrl()

and parseer of github/docopt/docopt will be provided in cmd

arguments := docopt.Parse(...)
q := parseArgsToQuery(arguments)

Article

a := Article{}
a.String()
a.Json()

Parse

a, err := ParseDoc(doc)
title := parseSelection(s)

Files

- query (parse argv => url)
  - Query{}
  - SearchUrl()
- article
  - Article{}
  - Json()
  - String()
- parse (parser url => doc)
  - ParseDoc()
  - parseTitle()
  - parseYear(0
sotetsuk commented 8 years ago

directory design

goscholar/
  | - query.go
  | - article.go
  | - parser.go
  | - cmd
        | - goscholar
              | - main.go

related

sotetsuk commented 8 years ago

Memo

article.isValid => a, [err] := parseDoc(doc)
sotetsuk commented 8 years ago

old

article.go

- [x] Article struct {} => article
- [x] NewArticle() *Article => x
- [x] func (a *Article) Parse(s *goquery.Selection)  => parser
- [x] func (a *Article) parseTitle(s *goquery.Selection)  => parser
- [x] func (a *Article) parseHeader(s *goquery.Selection) => parser
- [x] func (a *Article) parseFooter(s *goquery.Selection) => parser
- [x] func (a *Article) parseSideBar(s *goquery.Selection) => parser
- [x] func (a *Article) String() string  => article
- [x] func (a *Article) Json() => article
- [x] func (a *Article) isValid() => ParseSelection
- [x] func (a *Article) same (b *Article) => 
- [x] func (a *Article) showDifference(b *Article)
- [x] func (a *Article) hasSameURL(b *Article) bool}

articles.go

- [x] func ParseArticles(ch chan *Article, doc *goquery.Document) => ParseDoc
- [x] func StdoutArticleAsJson(ch chan *Article) => cmd

query.go

- [x] func SearchQuery(arguments map[string]interface{}) (string, error) => query
- [x] func FindQuery(arguments map[string]interface{}) (string, error) => query
- [x] func CiteQuery(arguments map[string]interface{}) (string, error) => query
- [x] func CitePopUpQuery(info string) (string, error) => query

utils.go

- [x] func parseAndInitializeArguments(arguments map[string]interface{}) (query, author, title, cluster_id, after, before, start, num string) => cmd
- [x] func getDoc() {} => fetch (?)
- [x] func parseYear(s string) string => parser
- [x] func parseClusterId(url string) string => parser
- [x] func parseNumberOfCitations(s string) string => parser
- [x] func parseNumberOfVersions(s string) string => parser
- [x] func parseInfoId(url string) string => parser
- [x] func parsePDFSource(s string) string => parser
- [x] func startAndEndWithDoubleQuotation(s string) bool => parser
- [x] func trimParameter(url string, trimming string) string => parser
sotetsuk commented 8 years ago

new

query.go (issue url)

type Query{}
func SearchUrl() {}
func FindUrl() {}
func CiteUrl() {}
func CitePopUrl() {}

fetch.go (url => doc)

func Fetch(url) (*goquery.Document, error) {}

article.go

type Article{}
func Json() {}
func String() {}

parse.go

func ParseDoc(ch chan *Article, doc *goquery.Documentation) {}
func ParseSelection(s *goquery.Selection) (*Article, error) {}
func parseH3(s *goquery.Selection) {}
func parseGreenLine(s *goquery.Selection) {}
func parseBottom(s *goquery.Selection) {}
func parseSideBar(s *goquery.Selection) {}

text.go

func parseYearText(s string) string {}
func parseClusterIdText(url string) string {}
func parseNumberOfCitationsText(s string) string {}
func parseNumberOfVersionsText(s string) string {}
func parseInfoIdText(url string) string {}
func parsePDFSourceText(s string) string {}
func startAndEndWithDoubleQuotation(s string) bool {}
func trimParameter(url string, trimming string) string {}

utils.go

func same (a *Article, b *Article) => 
func showDifference(a *Article, b *Article)
func hasSameURL(a *Article, b *Article) bool}

cmd/goscholar/main.go

func parseAndInitializeArguments(arguments map[string]interface{}) Query {}
func StdoutArticleAsJson(ch chan *Article) {}

looks good!