18202821297 commented 9 months ago

请求频繁，被快手禁止了，使用的是默认的cookies，能否改为使用谷歌浏览器自动化网页，并且开启隐身窗口，每次进行直播间检测时使用浏览器方式来进行地址抓取？这样模拟正常的用户操作？

18202821297 commented 9 months ago

替换get_kuaishou_stream_data2文件

#重点，需要在配置文件中把每个地址的访问时间修改为15到20秒的样字，窗口是单线程的不是多线程
# 开启一个远程监听窗口，chrome.exe --remote-debugging-port=9111 --user-data-dir="D:\books\9111"
#扫码登录你的快手，可以保持很久，因为cookie缓存在浏览器的

可以把启动窗口命令，和启动main.py设置为bat文件，加入开机启动项

@trace_error_decorator def get_kuaishou_stream_data2(url: str, cookies: Union[str, None] = None) -> Dict[str, Any]: from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.service import Service

url = 'https://live.kuaishou.com/u/kele10800'

opt = webdriver.ChromeOptions()
chrome_driver_path = r'D:\books\111\DouyinLiveRecorder-main\chromedriver.exe' #chromedriver地址

opt.add_experimental_option("debuggerAddress", "127.0.0.1:9111")
opt.add_argument('--blink-settings=imagesEnabled=false')  # 禁用加载图片
opt.add_argument('--disable-gpu')
opt.add_argument("--disable-blink-features=AutomationControlled")
opt.add_argument("user-agent=your_user_agent_string")
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=opt)
driver.get('view-source:' + url)
time.sleep(5)
# 获取页面源代码
page_source = driver.page_source
# 打印页面源代码

# 使用正则表达式提取"playList"中的数据
pattern = re.compile(r'window\.__INITIAL_STATE__=\s*({[^<]+?})\s*;', re.DOTALL)
match = pattern.search(page_source)
# 获取匹配的 JSON 数据
json_data_str = match.group(1)
json_data = json.loads(json_data_str)
# 将 JSON 数据转换为 Python 对象
last_element = json_data['liveroom']['playList']
last_element = last_element[-1]
# 获取anchor_name
anchor_name = last_element['author']['name']
# 获取playUrls
adaptationSet = last_element['liveStream']['playUrls'][0]['adaptationSet']['representation']
adaptationSet = adaptationSet[-1]
playUrls = adaptationSet['url']
flv = ''
m3u8 = ''
result = {
    "type": 1,
    "anchor_name": anchor_name,
    "is_live": False,
}
if playUrls:
    result['is_live'] = True
    if 'flv' in playUrls:
        flv = playUrls
        print('URL 中包含 flv，进行相应处理')
        # 在这里添加 flv 处理的代码
    # 判断是否存在 'm3u8'
    if 'm3u8' in playUrls:
        m3u8 = playUrls
        print('URL 中包含 m3u8，进行相应处理')
        # 在这里添加 m3u8 处理的代码
    result['backup'] = {'m3u8_url': m3u8, 'flv_url': flv}
    result['record_url'] = playUrls
    print(result)
else:
    print('没有直播地址，或者未开播')

callmezhan commented 9 months ago

搞了半天还是不行，能分享一下吗大佬。。。

18202821297 commented 9 months ago

print(result) 这里改成return result不是print,然后，可能会存在直播地址需要格式化一下，就是把html地址字符那些符号转成正常格式，然后启动浏览器的命令加上使用无痕模式把 --什么忘了问一下chatgpt，源码，等我空了就发一份出来吧，我还集成了一个好玩的网站哈哈，我不发出来

18202821297 commented 9 months ago

chromedriver.exe 是什么自己百度去，细节有点多

18202821297 commented 9 months ago


import hashlib
import time
import urllib.parse
from typing import Union, Dict, Any
import requests
import re
import json
import execjs
import urllib.request
from utils import trace_error_decorator
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from urllib.parse import urlparse
no_proxy_handler = urllib.request.ProxyHandler({})
opener = urllib.request.build_opener(no_proxy_handler)
#替换 spider.py 文件的get_douyin_stream_data2方法，需要用到chromedriver.exe ，和谷歌浏览器开启监听窗口，有时会出现滑块验证码，可以自己写个代码，有滑块了发个邮件到自己邮箱，手动打码一次，可以管几天，然后就是快手轮询时间设置久一些
@trace_error_decorator
def get_kuaishou_stream_data2(url: str, cookies: Union[str, None] = None) -> Dict[str, Any]:
    # url = 'https://live.kuaishou.com/u/kele10800'
    # liulan()
    # time.sleep(5)
    opt = webdriver.ChromeOptions()
    chrome_driver_path = r'chromedriver.exe'
    opt.add_experimental_option("debuggerAddress", "127.0.0.1:9111")
    opt.add_argument('--blink-settings=imagesEnabled=false')  # 禁用加载图片
    opt.add_argument('--disable-gpu')
    opt.add_argument("--disable-blink-features=AutomationControlled")
    opt.add_argument("user-agent=your_user_agent_string")
    opt.add_argument('--charset=UTF-8')  # 设置编码为 UTF-8
    service = Service(chrome_driver_path)
    driver = webdriver.Chrome(service=service, options=opt)
    driver.get('view-source:' + url)
    time.sleep(3)
    # 获取页面源代码
    page_source = driver.page_source
    # 打印页面源代码

    # 使用正则表达式提取"playList"中的数据
    pattern = re.compile(r'window\.__INITIAL_STATE__=\s*({[^<]+?})\s*;', re.DOTALL)
    match = pattern.search(page_source)
    # 获取匹配的 JSON 数据
    json_data_str = match.group(1)
    json_data = json.loads(json_data_str)
    # 将 JSON 数据转换为 Python 对象
    last_element = json_data['liveroom']['playList']
    last_element = last_element[-1]
    # 获取anchor_name
    anchor_name = last_element['author']['name']
    # 使用正则表达式去除所有非字母、非数字和非点号字符
    anchor_name = re.sub(r'[^\u4e00-\u9fa5a-zA-Z]', '', anchor_name)

    # 获取playUrls
    adaptationSet = last_element['liveStream']['playUrls'][0]['adaptationSet']['representation']
    adaptationSet = adaptationSet[-1]
    playUrls = adaptationSet['url']
    playUrls = bytes(playUrls, 'utf-8').decode('unicode-escape')
    flv = ''
    m3u8 = ''
    result = {
        "type": 1,
        "anchor_name": anchor_name,
        "is_live": False,
    }
    if playUrls:
        result['is_live'] = True
        if 'flv' in playUrls:
            flv = playUrls
            print('URL 中包含 flv，进行相应处理')
            # 在这里添加 flv 处理的代码
        # 判断是否存在 'm3u8'
        if 'm3u8' in playUrls:
            m3u8 = playUrls
            print('URL 中包含 m3u8，进行相应处理')
            # 在这里添加 m3u8 处理的代码
        result['backup'] = {'m3u8_url': m3u8, 'flv_url': flv}
        result['record_url'] = playUrls
        # print(result)

        return result
    else:
        result['anchor_name'] = ''
        result['backup'] = {'m3u8_url': '', 'flv_url': ''}
        result['record_url'] = ''
        return result

18202821297 commented 9 months ago

他这个项目自带呀，你就替换你需要的代码就行了，获取地址，还有就是那几个循环的网址判断，你要什么网站自己加进去就行了，我想从新写，没时间忙很

ihmily / DouyinLiveRecorder

快手录制，无法获取到录制信息 #86

可以把启动窗口命令，和启动main.py设置为bat文件，加入开机启动项

url = 'https://live.kuaishou.com/u/kele10800'

ihmily / DouyinLiveRecorder

快手录制，无法获取到录制信息 #86

可以把 启动窗口命令，和启动main.py设置为bat文件，加入开机启动项

url = 'https://live.kuaishou.com/u/kele10800'

可以把启动窗口命令，和启动main.py设置为bat文件，加入开机启动项