cjw123ztt / stocktupoui

realtime subscribe the stock tupo info
0 stars 2 forks source link

python爬虫公募抱团分析 #184

Closed cjw123ztt closed 3 years ago

cjw123ztt commented 3 years ago

https://blog.csdn.net/chenlibao0823/article/details/81989002

  1. Selenium https://blog.csdn.net/weixin_36279318/article/details/79475388
  2. Beautiful Soup https://blog.csdn.net/slhlde/article/details/81937838
  3. chromedriver https://chromedriver.storage.googleapis.com/index.html?path=2.35/
cjw123ztt commented 3 years ago

def get_fund_info_ex2(code): url = "http://fundf10.eastmoney.com/ccmx{}.html".format(code) print(url) opt = webdriver.ChromeOptions() opt.set_headless() driver = webdriver.Chrome(options=opt) driver.maximize_window() driver.get(url) driver.implicitly_wait(5) day = datetime.date.today() today = '%s' % day

with open('jijin1.html', 'w', encoding='utf-8') as f:
    f.write(driver.page_source)
time.sleep(1)
file = open('jijin1.html', 'r', encoding='utf-8')
soup = BeautifulSoup(file, 'lxml')
# print(soup)
try:
    fund = soup.select('#bodydiv > div > div > div.basic-new > div.bs_jz > div.col-left > h4 > a')[0].get_text()
    scale = soup.select('#bodydiv > div > div.r_cont > div.basic-new > div.bs_gl > p > label > span')[
        2].get_text().strip().split()[0]
    tables = soup.select('#cctable > div > div > table')
    #trs   = tables[0].select('tbody > tr')
    index = -1
    for table in tables:
        #
        index = index + 1
        #
        trs = tables[index].select('tbody > tr')  # index
        # print(table[1])
        # print(trs)
        #for tr in trs:
            #print(tr)
            #code = tr.select('td > a')[0].get_text()
            #name = tr.select('td > a')[1].get_text()
            # price = tr.select('td > span')[0].get_text()#error
            #print(code)
            #print(name)
            # print(price)
        #
        QDateH4 = soup.select('#cctable > div > div > h4')[index]  # index
        QDateLabel = QDateH4.select('label')[1]
        QDate = QDateLabel.select('font')[0].get_text()
        #print(QDate)
        #

        for tr in trs:
            #print(tr)
            code = tr.select('td > a')[0].get_text()
            name = tr.select('td > a')[1].get_text()
            """
            price = tr.select('td > span')[0].get_text()
            try:
                round(float(price), 2)
            except ValueError:
                price = 0
            """
            # position
            if index == 0:
                ratio = tr.select('td.tor')[2].get_text()
                position = tr.select('td.tor')[3].get_text()
            else:
                ratio = tr.select('td.tor')[0].get_text()
                position = tr.select('td.tor')[1].get_text()
            """
            market = float(num.replace(',', '')) * float(price)
            """
            data = {
                'crawl_date': QDate,
                'code': code,
                'fund': fund.split(' (')[0],
                'scale': scale,
                'name': name,
                # 'price': round(float(price), 2),
                'position': round(float(position.replace(',', '')), 2),
                # 'market_value': round(market, 2),
                'ratio': ratio,
                'fund_url': url
            }
            print(data)
            # fund_data.insert(data)

except IndexError:
    info = {
        'url': url
    }
    # fund_no_data.insert(info)
cjw123ztt commented 3 years ago

{'crawl_date': '2020-09-30', 'code': '002127', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '南极电商', 'position': 2267.36, 'ratio': '7.84%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '002013', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '中航机电', 'position': 2810.09, 'ratio': '6.45%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '601318', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '中国平安', 'position': 319.65, 'ratio': '4.88%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '002271', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '东方雨虹', 'position': 406.97, 'ratio': '4.39%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '300142', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '沃森生物', 'position': 375.18, 'ratio': '3.82%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '600519', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '贵州茅台', 'position': 11.15, 'ratio': '3.72%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '000513', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '丽珠集团', 'position': 344.38, 'ratio': '3.39%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '002475', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '立讯精密', 'position': 277.37, 'ratio': '3.17%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '000858', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '五粮液', 'position': 71.01, 'ratio': '3.14%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-09-30', 'code': '002444', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '巨星科技', 'position': 657.69, 'ratio': '2.60%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '002127', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '南极电商', 'position': 1907.79, 'ratio': '7.99%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '002384', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '东山精密', 'position': 929.1, 'ratio': '5.50%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '002475', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '立讯精密', 'position': 450.47, 'ratio': '4.58%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '002271', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '东方雨虹', 'position': 548.52, 'ratio': '4.41%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '300142', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '沃森生物', 'position': 403.11, 'ratio': '4.18%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '000975', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '银泰黄金', 'position': 1188.39, 'ratio': '3.69%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '000858', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '五粮液', 'position': 93.59, 'ratio': '3.17%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '300253', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '卫宁健康', 'position': 667.05, 'ratio': '3.03%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '300750', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '宁德时代', 'position': 86.47, 'ratio': '2.98%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-06-30', 'code': '000547', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '航天发展', 'position': 1041.56, 'ratio': '2.82%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '002127', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '南极电商', 'position': 2619.88, 'ratio': '6.92%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '600048', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '保利地产', 'position': 1498.44, 'ratio': '5.07%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '002271', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '东方雨虹', 'position': 651.22, 'ratio': '5.05%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '002643', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '万润股份', 'position': 1610.3, 'ratio': '4.65%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '000858', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '五粮液', 'position': 116.92, 'ratio': '3.07%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '300226', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '上海钢联', 'position': 177.07, 'ratio': '2.75%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '000547', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '航天发展', 'position': 800.0, 'ratio': '2.45%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '300572', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '安车检测', 'position': 204.39, 'ratio': '1.81%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '603259', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '药明康德', 'position': 80.12, 'ratio': '1.65%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'} {'crawl_date': '2020-03-31', 'code': '603806', 'fund': '华夏成长混合', 'scale': '49.94亿元', 'name': '福斯特', 'position': 175.98, 'ratio': '1.63%', 'fund_url': 'http://fundf10.eastmoney.com/ccmx_000001.html'}

cjw123ztt commented 3 years ago
item comments
基金的持仓信息 天天基金网获取
个股的行业信息 所属行业和流通股都可以来由通达信导出