Closed calendarbase closed 8 years ago
This works:
var scrapy = require('node-scrapy')
, url = 'https://github.com/eeshi/node-scrapy'
, model =
{ author: '.author',
repo: '.js-current-repository',
stats:
{ commits: '.commits .num',
branches: '.numbers-summary > li.commits + li .num',
releases: '.numbers-summary > li.commits + li + li .num',
contributors: '.numbers-summary > li.commits + li + li + li .num',
social:
{ stars: '.star-button + .social-count',
forks: '.fork-button + .social-count' } },
files: '.content' }
scrapy.scrape(url, model, function(err, data) {
if (err) return console.error(err)
console.log(data)
});
But this repo is tricky:
var scrapy = require('node-scrapy')
, url = 'https://github.com/spf13/hugoThemes'
, model =
{ author: '.author',
repo: '.js-current-repository',
stats:
{ commits: '.commits .num',
branches: '.numbers-summary > li.commits + li .num',
releases: '.numbers-summary > li.commits + li + li .num',
contributors: '.numbers-summary > li.commits + li + li + li .num',
social:
{ stars: '.star-button + .social-count',
forks: '.fork-button + .social-count' } },
files: '.content' }
scrapy.scrape(url, model, function(err, data) {
if (err) return console.error(err)
console.log(data)
});
The result is commit messages and I can't get the href:
files:
[ 'Failed to load latest commit information.',
'academic @ 1a20f5e',
'agency @ 6415e68',
'aglaus @ f0328f8',
'air @ 0e1c3b0',
'allegiant @ 6c49d13',
'angels-ladder @ bbe249f',
'artists @ baf5506',
'aurora @ d760f75',
'base16 @ 058fd51',
'beautifulhugo @ ecb7e94',
'beg @ 80a6b59',
'blackburn @ 8103131',
'bleak @ 13bf460',
'bootie-docs @ e14a0e4',
'bootstrap @ d8219ca',
'cactus @ 7921c05',
'casper @ 0c6470f',
'cocoa @ e32b495',
'creative @ 5c1fdb2',
'crisp @ e99ce5d',
'detox @ b56f0e2',
'freelancer @ e8dd137',
'future-imperfect @ 2db677f',
'ghostwriter @ f3662c3',
'gindoro @ 6bfc86b',
'github-project-landing-page @ 9d8a23c',
'greyshade @ 28fb061',
'grid-side @ d384625',
'heather-hugo @ cb93a3b',
'herring-cove @ fe7ce04',
'hikari @ ecfaed7',
'html5 @ 9a3e7a0',
'hugo-base-theme @ f78b4d1',
'hugo-bootstrap-premium @ d1f1b8c',
'hugo-bootswatch @ 4367ac1',
'hugo-darkdoc-theme @ 7b2069f',
'hugo-geo @ b14540d',
'hugo-h5bp @ 7741bbc',
'hugo-icarus @ ff019b8',
'hugo-identity-theme @ 79e03ea',
'hugo-incorporated @ 3d21a63',
'hugo-lithium-theme @ 08ba4a9',
'hugo-mdl @ 79c6f0e',
'hugo-minimalist @ 21036c6',
'hugo-multi-bootswatch @ 0f12d1b',
'hugo-octopress @ bcd9bcd',
'hugo-pacman-theme @ 604c57f',
'hugo-phlat-theme @ 168cd40',
'hugo-plus @ bba9290',
'hugo-theme-arch @ 31a5301',
'hugo-theme-geppaku @ d56d47c',
'hugo-theme-learn @ 75df02c',
'hugo-uno @ a66c2db',
'hugo-zen @ ff5a388',
'hugoscroll @ 6f6ce41',
'hurock @ 13081b8',
'hyde @ a04b9e1',
'hyde-x @ ee61d83',
'hyde-y @ 493bf99',
'internet-weblog @ 6a9c9a5',
'journal @ e543e4b',
'landing-page-hugo @ 9280715',
'lanyon @ 0c3da68',
'liquorice @ b2b6d57',
'material-design @ 0d88058',
'material-docs @ e133b47',
'material-lite @ 24a23e3',
'next @ b8c8076',
'nofancy @ 151dd32',
'persona @ afb2d45',
'pixyll @ 50dad69',
'polymer @ d8be002',
'projecthub @ d0fb856',
'purehugo @ 3c4ee78',
'redlounge @ 7853d1e',
'robust @ 69ce3f1',
'rocktopus @ c47bcd7',
'shiori @ e23d4a1',
'simple-a @ 3338b25',
'simple-hugo @ 20d9086',
'slender @ 0874af0',
'slim @ a491cb0',
'startbootstrap-clean-blog @ c3ff77a',
'steam @ d917cfa',
'strata @ 253c8bb',
'tachyons @ a98c649',
'tinyce @ ec650df',
'twentyfourteen @ 83f0a78',
'type @ e8a2c26',
'vienna @ 890e97c',
'.gitmodules',
'LICENSE',
'README.md' ] }
Hi @calendarbase, thanks for reaching out.
Yes, Github's site have been updated since the examples were written.
I think this is what you want:
var scrapy = require('node-scrapy')
, url = 'https://github.com/spf13/hugoThemes'
, model =
{ author: '.author',
repo: {
selector: 'meta[property="og:url"]',
get: 'content'
},
stats:
{ commits: '.commits .num',
branches: '.numbers-summary > li.commits + li .num',
releases: '.numbers-summary > li.commits + li + li .num',
contributors: '.numbers-summary > li.commits + li + li + li .num',
social:
{ stars: '.social-count[href$=stargazers]',
forks: '.social-count[href$=network]' } },
files: {
selector: '.content a[href]',
get: 'href'
}
}
scrapy.scrape(url, model, function(err, data) {
if (err) return console.error(err)
console.log(data)
})
It outputs:
{ author: 'spf13',
repo: 'https://github.com/spf13/hugoThemes',
stats:
{ commits: '271',
branches: '1',
releases: '0',
contributors: '31',
social: { stars: '414', forks: '95' } },
files:
[ '/gcushen/hugo-academic/tree/1a20f5e6d70908a2f41f1c6d331361c68024b062',
'/digitalcraftsman/hugo-agency-theme/tree/6415e68a97f7cab3e04497e087ce348e1ca7574e',
'/dim0627/hugo_theme_aglaus/tree/f0328f8f825591b1efa0f677ce9e3c3691b37e60',
'/syui/hugo-theme-air/tree/0e1c3b0bfb335dfa23f9ec1198e24628a3df707b',
'/brycematheson/allegiant/tree/6c49d136e538d514fd3bbc4371104bfcb1d99814',
'/tanksuzuki/angels-ladder/tree/bbe249fcc3483b20e4db2c59350bbf507bb857a6',
'/digitalcraftsman/hugo-artists-theme/tree/baf55066fec9c97ab590bdbac71d8bb4e36cf87c',
'/coryshaw/hugo-aurora-theme/tree/d760f752f18361025a8b397ca4fd081787757bcf',
'/htdvisser/hugo-base16-theme/tree/058fd51e22c705f080a0fe09d998ad5af1394d12',
'/halogenica/beautifulhugo/tree/ecb7e949e7c08d982d77b16be53e89e7202f5ae6',
'/dim0627/hugo_theme_beg/tree/80a6b59904f76ab2b994f231374e0bb700361ff4',
'/yoshiharuyamashita/blackburn/tree/8103131976363a8e6d53abc8ca01cee1fc6871c8',
'/Zenithar/hugo-theme-bleak/tree/13bf4609193d972828256164c629beccb9613576',
'/key-amb/hugo-theme-bootie-docs/tree/e14a0e4fec4720c7a30cf186f623d18259b1dd14',
'/mmrath/hugo-bootstrap/tree/d8219ca9bb7decbdf50b516e44955c7d0a0d29bb',
'/digitalcraftsman/hugo-cactus-theme/tree/7921c05d7e32df62f43b0df46b501f455182e290',
'/vjeantet/hugo-theme-casper/tree/0c6470f66b81e9bf24aa29c5b9e1f1db289368d1',
'/nishanths/cocoa-hugo-theme/tree/e32b495b664ea8cc156a9fa38a3157077837672b',
'/digitalcraftsman/hugo-creative-theme/tree/5c1fdb2dd5f60dd6ce0d28ce940cdddcfe403fe6',
'/Zenithar/hugo-theme-crisp/tree/e99ce5df55f8184af72c0d3e6878c99a47e3fdf2',
'/allnightgrocery/hugo-theme-blueberry-detox/tree/b56f0e23827959e53f58f3819061a15da00df204',
'/digitalcraftsman/hugo-freelancer-theme/tree/e8dd1373964d5b838d130b5b2b9b2a04ba3c88d8',
'/jpescador/hugo-future-imperfect/tree/2db677fe337a2bab43b776318559f86db9316daf',
'/jbub/ghostwriter/tree/f3662c30747ad192f8dceee43e094f70ad1e9963',
'/cdipaolo/gindoro/tree/6bfc86bd85d02e524b69aca3bfd134abd5cecab7',
'/oarrabi/github-project-landing-page/tree/9d8a23c233479f0411c3547ba2fb0fc14df6ca6f',
'/cxfksword/greyshade/tree/28fb061bb674a2add89724dfbbf167f88f381d40',
'/chipsenkbeil/grid-side/tree/d3846256bb43410ddcd0288a60884506abeb42ff',
'/hbpasti/heather-hugo/tree/cb93a3bd5fffcb1c5f0c9bc4623c92819a0f0a9c',
'/spf13/herring-cove/tree/fe7ce044ce05343dd38e3076d1d9d44215c99f5a',
'/digitalcraftsman/hugo-hikari-theme/tree/ecfaed71d673a17add5bb94d07fbd42c89c4c4e9',
'/simonmika/hugo-theme-html5/tree/9a3e7a0b479c6d06147c05759723d312c90cf0aa',
'/crakjie/hugo-base-theme/tree/f78b4d181b8dc970abde3a704524ac259af74f68',
'/appernetic/hugo-bootstrap-premium/tree/d1f1b8cbc7d67fb7b06957cf3eca8b6b62c83fdb',
'/nilproductions/hugo-bootswatch/tree/4367ac101b911780b8ce8050cbca102bb0e6f93f',
'/adejoux/hugo-darkdoc-theme/tree/7b2069f14038c3e1cb0792e040a1a91c1480ec22',
'/alexurquhart/hugo-geo/tree/b14540d1d84ae3c129d2b76a25cab89860a5eda5',
'/garvincasimir/hugo-h5bp-simple/tree/7741bbce77c9fd66843ca102ae4ed118b42dd06e',
'/digitalcraftsman/hugo-icarus-theme/tree/ff019b8cac212562bcd39dc0c64c7d3e1bf8ae5c',
'/aerohub/hugo-identity-theme/tree/79e03ea6a44842be039984b08659b37238bb53bb',
'/nilproductions/hugo-incorporated/tree/3d21a638bbad6b3902873cfe8a13cb15d6d3ce0a',
'/jrutheiser/hugo-lithium-theme/tree/08ba4a9261d876837416d1129714e50d79a1b5cb',
'/jchatkinson/HugoMDL/tree/79c6f0ec4ceab910e169c759924320b8b007cae0',
'/digitalcraftsman/hugo-minimalist-theme/tree/21036c6936a63789a2b7edc7b88b99dcb425a4a2',
'/mpas/hugo-multi-bootswatch/tree/0f12d1be8d1d4ff84f79f228d6c0584ff631e8fc',
'/parsiya/Hugo-Octopress/tree/bcd9bcd32cc2810858c223c3f316420eb1b49795',
'/coderzh/hugo-pacman-theme/tree/604c57f4184ccbeef8340208c5900fe269e968f5',
'/nraboy/hugo-phlat-theme/tree/168cd40e198b248e9fe6d620a4140861ef46b282',
'/H4tch/hugo-plus/tree/bba92902cd393af3b538380ed032df32d0cfc54c',
'/syui/hugo-theme-arch/tree/31a5301662204879642302e7c1fdfc00f8c033b3',
'/masa0221/hugo-theme-geppaku/tree/d56d47c483c84c514de5b37727ee008ef9b95c62',
'/matcornic/hugo-theme-learn/tree/75df02c3ab7dc5e8fe81c5e220e3e7c54bea3252',
'/SenjinDarashiva/hugo-uno/tree/a66c2dbb189c9eb6ee8ca89ec5342a38edcb0625',
'/rakuishi/hugo-zen/tree/ff5a38825a65229bca110739c261d7e782f751e7',
'/SenjinDarashiva/hugoscroll/tree/6f6ce41f4791f507f257c09a7f45ca306894d6f9',
'/TiTi/hurock/tree/13081b8ef3c29d3ab27e25ea21a7a510e53c3376',
'/spf13/hyde/tree/a04b9e15746f679a3a6d8f325b82e2392b20d380',
'/zyro/hyde-x/tree/ee61d837b64e6a15adf12ee3f231dc29ede871bd',
'/enten/hyde-y/tree/493bf9942c9cd88f714f6616dead1bec71c5c8f2',
'/jnjosh/internet-weblog/tree/6a9c9a54690ced05ecf72e17c693da17221e3d85',
'/mpas/hugo-journal/tree/e543e4bfa9de79306e906f34da42a8ee7ac48bc8',
'/crakjie/landing-page-hugo/tree/9280715492960afb82dc5a3add39c9f458d5a41e',
'/tummychow/lanyon-hugo/tree/0c3da68b8e1cc9b7616c977aab34d7cd7e283da9',
'/eliasson/liquorice/tree/b2b6d571e0fb2ab4470964320bebf1beb418c812',
'/pdevty/material-design/tree/0d88058fead04a70d14f34552e07a1fd81911ca6',
'/digitalcraftsman/hugo-material-docs/tree/e133b473c44fef8f13813bd660d8975a272c4005',
'/SamuelDebruyn/hugo-material-lite/tree/24a23e3be3f7f9b2f9d825b5ab4aa51f9cf1818c',
'/leopku/hugo-theme-next/tree/b8c8076d32755b3921ec3e461c77b6c67716150f',
'/gizak/nofancy/tree/151dd320af1f43d7c783f80d2a98df6ccc59b26f',
'/pcdummy/hugo-theme-persona/tree/afb2d458f97b6fe1c295afbcd336b3c6e71028d0',
'/azmelanar/hugo-theme-pixyll/tree/50dad694aaf8edbadc2e100b30d8742dad5f0030',
'/pdevty/polymer/tree/d8be0028b80dcf7026489598f4f8905ecc291611',
'/vjeantet/hugo-theme-projecthub/tree/d0fb856de1d22a396659a9c3deeeddeb4eeb256e',
'/dplesca/purehugo/tree/3c4ee78cb7905f00545498733c8f7f6c18bc360c',
'/tmaiaroto/hugo-redlounge/tree/7853d1eb5de8976a3087db0bff3f865b8efbd72d',
'/dim0627/hugo_theme_robust/tree/69ce3f191a948ddeb323de3e836d0152f8525a30',
'/esell/rocktopus/tree/c47bcd755c4fd25a50bdff09fc1064da6d29bbe0',
'/chibicode/hugo-theme-shiori/tree/e23d4a165a25e08d054ccce96f072989138347c5',
'/AlexFinn/simple-a/tree/3338b258ebccf0ed027fe9cb9f988fd023364a9c',
'/druzza/simple-hugo/tree/20d9086e64e8493b931a419e52866e007e8699e7',
'/CrimsonRay/slender/tree/0874af0b51a2ce3e7603e5f0f2d76ef1eb05e15e',
'/zhe/hugo-theme-slim/tree/a491cb0d5d01970c83d81cd58cb204091d085ee3',
'/humboldtux/startbootstrap-clean-blog/tree/c3ff77a0e2b29f5be3e6812139f4d23945d8a3ff',
'/digitalcraftsman/hugo-steam-theme/tree/d917cfa3ac9b89ca2adca2ed796d03a915a74d3c',
'/digitalcraftsman/hugo-strata-theme/tree/253c8bba874a9bb60ef9bd1ba925fc52d1a795a7',
'/marloncabrera/tachyons/tree/a98c649c0afc175d2332c9ba9a6e0cc92aa7346b',
'/roperzh/tinyce-hugo-theme/tree/ec650dfeeda9ae94152e51f0b8b01d61dbbf9e0c',
'/jaden/twentyfourteen/tree/83f0a782af8c678f0017d56c2ac8478c31cf8c52',
'/digitalcraftsman/hugo-type-theme/tree/e8a2c263a3d1bbe1bfd53ab0892b0ad0620769a0',
'/keichi/vienna/tree/890e97c5a052984d04294862ac60c7e0bfc6a934',
'/spf13/hugoThemes/blob/master/.gitmodules',
'/spf13/hugoThemes/blob/master/LICENSE',
'/spf13/hugoThemes/blob/master/README.md' ] }
Regards!
Thanks! I saw this now. Looks like a useful solution. Don't remember what I finally used. A strange combination of Osmosis and RB/J I think.
Thought first that it was because the project was moved but I get files null on every GitHub projects.
My node.js code: