lossless1024 / StreaMonitor

Adult live stream downloader for advanced people. I could have chosen a better name.
GNU General Public License v3.0
172 stars 42 forks source link

[Feature] Split recording by file size #148

Open amajio opened 1 month ago

amajio commented 1 month ago

./parameters.py

DOWNLOADS_DIR = 'path/to/recording/directory'
MIN_FREE_DISK_PERCENT = 1.0  # in %
DEBUG = False
WANTED_RESOLUTION = 1080
WANTED_RESOLUTION_PREFERENCE = 'closest'
VIDEO_FILE_EXTENSION = '.ts' #record file extension
SPLIT_SIZE_MB = 1024  #Number in megabyte 1024 MB = 1GB

./streamonitor/downloaders/ffmpeg.py

import errno
import subprocess
import requests.cookies
from threading import Thread
from parameters import DEBUG

from parameters import SPLIT_SIZE_MB
import os
import time

def getVideoFfmpeg(self, url, filename):

    cmd = [
        'ffmpeg',
        '-user_agent', self.headers['User-Agent']
    ]

    if type(self.cookies) is requests.cookies.RequestsCookieJar:
        cookies_text = ''
        for cookie in self.cookies:
            cookies_text += cookie.name + "=" + cookie.value + "; path=" + cookie.path + '; domain=' + cookie.domain + '\n'
        if len(cookies_text) > 10:
            cookies_text = cookies_text[:-1]
        cmd.extend([
            '-cookies', cookies_text
        ])

    cmd.extend([
        '-i', url,
        '-c:a', 'copy',
        '-c:v', 'copy',
        filename
    ])

    class _Stopper:
        def __init__(self):
            self.stop = False

        def pls_stop(self):
            self.stop = True

    stopping = _Stopper()

    error = False

    def getSize(file_path):
        try:
            file_size = os.path.getsize(file_path)
            return file_size
        except FileNotFoundError:
            return 0

    def moment():
        return int(time.time())

    def process_terminate(process, filename):
        def endProcess():
            time.sleep(1)
            process.communicate(b'q')

        timer_thread = Thread(target=endProcess)
        timer_thread.start()

    def execute():
        nonlocal error
        try:
            stdout = open(filename + '.stdout.log', 'w+') if DEBUG else subprocess.DEVNULL
            stderr = open(filename + '.stderr.log', 'w+') if DEBUG else subprocess.DEVNULL
            process = subprocess.Popen(args=cmd, stdin=subprocess.PIPE, stderr=stderr, stdout=stdout)
        except OSError as e:
            if e.errno == errno.ENOENT:
                self.logger.error('FFMpeg executable not found!')
                error = True
                return
            else:
                self.logger.error("Got OSError, errno: " + str(e.errno))
                error = True
                return
        self.split['splitting'] = False
        self.split['checkAfter'] = moment() + 30
        while process.poll() is None:
            time.sleep(1)
            if moment() >= self.split['checkAfter']:
                self.split['checkAfter'] = moment() + 30
                self.split['size'] = getSize(filename)
                if self.split['size'] >= SPLIT_SIZE_MB * 1024 * 1024: 
                    self.split['process'] = process
                    self.split['file'] = filename
                    self.split['splitting'] = True
                    self.log(f'Splitting file')
                    break

            if stopping.stop:
                process.communicate(b'q')
                break
            try:
                process.wait(1)
            except subprocess.TimeoutExpired:
                pass

        if process.returncode and process.returncode != 0 and process.returncode != 255:
            self.logger.error('The process exited with an error. Return code: ' + str(process.returncode))
            error = True
            return

    thread = Thread(target=execute)
    thread.start()
    self.stopDownload = lambda: stopping.pls_stop()
    thread.join()
    self.stopDownload = None
    if self.split['splitting']:
        process_terminate(self.split['process'], self.split['file'])
    return not error

./streamonitor/bot.py

from __future__ import unicode_literals
import os
import m3u8
from enum import Enum
from time import sleep
from datetime import datetime
from threading import Thread

import requests
import requests.cookies

import streamonitor.log as log
from parameters import DOWNLOADS_DIR, DEBUG, WANTED_RESOLUTION, WANTED_RESOLUTION_PREFERENCE, VIDEO_FILE_EXTENSION
from streamonitor.downloaders.ffmpeg import getVideoFfmpeg

class Bot(Thread):
    loaded_sites = set()
    username = None
    site = None
    siteslug = None
    aliases = []
    ratelimit = False

    sleep_on_offline = 2
    sleep_on_long_offline = 300
    sleep_on_error = 20
    sleep_on_ratelimit = 180
    long_offline_timeout = 600

    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0"
    }

    class Status(Enum):
        UNKNOWN = 1
        NOTRUNNING = 2
        ERROR = 3
        PUBLIC = 200
        NOTEXIST = 400
        PRIVATE = 403
        OFFLINE = 404
        LONG_OFFLINE = 410
        RATELIMIT = 429

    status_messages = {
        Status.PUBLIC: "Channel online",
        Status.OFFLINE: "No stream",
        Status.LONG_OFFLINE: "No stream for a while",
        Status.PRIVATE: "Private show",
        Status.RATELIMIT: "Rate limited",
        Status.NOTEXIST: "Nonexistent user",
        Status.NOTRUNNING: "Not running",
        Status.ERROR: "Error on downloading"
    }

    def __init__(self, username):
        super().__init__()
        self.username = username
        self.logger = self.getLogger()

        self.cookies = None
        self.cookieUpdater = None
        self.cookie_update_interval = 0
        self.previous_status = None
        self.lastInfo = {}  
        self.split = {}
        self.running = False
        self.quitting = False
        self.sc = self.Status.NOTRUNNING  
        self.getVideo = getVideoFfmpeg
        self.stopDownload = None

    def getLogger(self):
        return log.Logger("[" + self.siteslug + "] " + self.username).get_logger()

    def restart(self):
        self.running = True

    def stop(self, a, b, thread_too=False):
        if self.running:
            self.log("Stopping...")
            if self.stopDownload:
                self.stopDownload()
            self.running = False
        if thread_too:
            self.quitting = True

    def getStatus(self):
        return self.Status.UNKNOWN

    def log(self, message):
        self.logger.info(message)

    def debug(self, message, filename=None):
        if DEBUG:
            self.logger.debug(message)
            if not filename:
                filename = os.path.join(self.outputFolder, 'debug.log')
            with open(filename, 'a+') as debugfile:
                debugfile.write(message + '\n')

    def status(self):
        message = self.status_messages.get(self.sc) or "Unknown error"
        if self.sc == self.Status.NOTEXIST:
            self.running = False
        return message

    def _sleep(self, time):
        while time > 0:
            sleep(1)
            time -= 1
            if self.quitting:
                return

    def run(self):
        while not self.quitting:
            while not self.running and not self.quitting:
                sleep(1)
            if self.quitting:
                break

            offline_time = self.long_offline_timeout + 1  # Don't start polling when streamer was offline at start
            while self.running:
                try:
                    self.sc = self.getStatus()
                    if self.sc != self.previous_status:
                        self.log(self.status())
                        self.previous_status = self.sc
                    if self.sc == self.Status.ERROR:
                        self._sleep(self.sleep_on_error)
                    if self.sc == self.Status.OFFLINE:
                        offline_time += self.sleep_on_offline
                        if offline_time > self.long_offline_timeout:
                            self.sc = self.Status.LONG_OFFLINE
                    elif self.sc == self.Status.PUBLIC or self.sc == self.Status.PRIVATE:
                        offline_time = 0
                        if self.sc == self.Status.PUBLIC:
                            if self.cookie_update_interval > 0 and self.cookieUpdater is not None:
                                def update_cookie():
                                    while self.sc == self.Status.PUBLIC and not self.quitting and self.running:
                                        self._sleep(self.cookie_update_interval)
                                        ret = self.cookieUpdater()
                                        if ret:
                                            self.debug('Updated cookies')
                                        else:
                                            self.logger.warning('Failed to update cookies')
                                cookie_update_process = Thread(target=update_cookie)
                                cookie_update_process.start()

                            video_url = self.getVideoUrl()
                            if video_url is None:
                                self.sc = self.Status.ERROR
                                self.logger.error(self.status())
                                self._sleep(self.sleep_on_error)
                                continue
                            self.log('Started downloading show')
                            ret = self.getVideo(self, video_url, self.genOutFilename())
                            while self.split['splitting']:
                                ret = self.getVideo(self, video_url, self.genOutFilename())
                            if not ret:
                                self.sc = self.Status.ERROR
                                self.log(self.status())
                                self._sleep(self.sleep_on_error)
                                continue
                except Exception as e:
                    self.logger.exception(e)
                    self.log(self.status())
                    self._sleep(self.sleep_on_error)
                    continue

                if self.quitting:
                    break
                elif self.ratelimit:
                    self._sleep(self.sleep_on_ratelimit)
                elif offline_time > self.long_offline_timeout:
                    self._sleep(self.sleep_on_long_offline)
                else:
                    self._sleep(self.sleep_on_offline)

            self.sc = self.Status.NOTRUNNING
            self.log("Stopped")

    def getPlaylistVariants(self, url):
        sources = []
        result = requests.get(url, headers=self.headers, cookies=self.cookies)
        m3u8_doc = result.content.decode("utf-8")
        variant_m3u8 = m3u8.loads(m3u8_doc)
        for playlist in variant_m3u8.playlists:
            resolution = playlist.stream_info.resolution if type(playlist.stream_info.resolution) is tuple else (0, 0)
            sources.append(( playlist.uri, resolution ))

        if not variant_m3u8.is_variant and len(sources) >= 1:
            self.logger.warn("Not variant playlist, can't select resolution")
            return None
        return sources #  [(url, (width, height)),...]

    def getWantedResolutionPlaylist(self, url):
        try:
            sources = self.getPlaylistVariants(url)
            if sources is None:
                return None

            if len(sources) == 0:
                self.logger.error("No available sources")
                return None

            sources2 = []
            for source in sources:
                width, height = source[1]
                if width < height:
                    source += (width - WANTED_RESOLUTION,)
                else:
                    source += (height - WANTED_RESOLUTION,)
                sources2.append(source)
            sources = sources2

            sources.sort(key=lambda a: abs(a[2]))
            selected_source = None

            if WANTED_RESOLUTION_PREFERENCE == 'exact':
                if sources[0][2] == 0:
                    selected_source = sources[0]
            elif WANTED_RESOLUTION_PREFERENCE == 'closest' or len(sources) == 1:
                selected_source = sources[0]
            elif WANTED_RESOLUTION_PREFERENCE == 'exact_or_least_higher':
                for source in sources:
                    if source[2] >= 0:
                        selected_source = source
                        break
            elif WANTED_RESOLUTION_PREFERENCE == 'exact_or_highest_lower':
                for source in sources:
                    if source[2] <= 0:
                        selected_source = source
                        break
            else:
                self.logger.error('Invalid value for WANTED_RESOLUTION_PREFERENCE')
                return None

            if selected_source is None:
                self.logger.error("Couldn't select a resolution")
                return None

            if selected_source[1][1] != 0:
                self.logger.info(f'Selected {selected_source[1][0]}x{selected_source[1][1]} resolution')
            selected_source_url = selected_source[0]
            if selected_source_url.startswith("https://"):
                return selected_source_url
            else:
                return '/'.join(url.split('.m3u8')[0].split('/')[:-1]) + '/' + selected_source_url
        except BaseException as e:
            self.logger.error("Can't get playlist, got some error: " + str(e))
            return None

    def getVideoUrl(self):
        pass

    def progressInfo(self, p):
        if p['status'] == 'downloading':
            self.log("Downloading " + str(round(float(p['downloaded_bytes']) / float(p['total_bytes']) * 100, 1)) + "%")
        if p['status'] == 'finished':
            self.log("Show ended. File:" + p['filename'])

    @property
    def outputFolder(self):
        return os.path.join(DOWNLOADS_DIR, self.username + ' [' + self.siteslug + ']')

    def genOutFilename(self, create_dir=True):
        folder = self.outputFolder
        if create_dir:
            os.makedirs(folder, exist_ok=True)
        now = datetime.now()
        filename = os.path.join(folder, self.username + '-' + str(now.strftime("%Y%m%d-%H%M%S")) + VIDEO_FILE_EXTENSION)
        return filename

    def export(self):
        return {"site": self.site, "username": self.username, "running": self.running}

    @staticmethod
    def str2site(site: str):
        site = site.lower()
        for sitecls in Bot.loaded_sites:
            if site == sitecls.site.lower() or \
                    site == sitecls.siteslug.lower() or \
                    site in sitecls.aliases:
                return sitecls

    @staticmethod
    def createInstance(username: str, site: str = None):
        if site:
            return Bot.str2site(site)(username)
amajio commented 1 month ago

first I apologies if anything wrong , All code from asking ChatGPT and working fine for me XD. and some fix from Pull requests >> Fix status updates spam and configuration video file extension https://github.com/lossless1024/StreaMonitor/pull/114 https://github.com/lossless1024/StreaMonitor/pull/136

TeslaLyon commented 1 week ago

Hello, can you submit the code? This allows you to visually view code changes

amajio commented 1 week ago

Hello, can you submit the code? This allows you to visually view code changes

Honestly i don't know how to do it, it's look complicated to me so i put in this section sorry :P

TeslaLyon commented 1 week ago

All right, how's it going so far? Will video freezes or video loss still occur after changing to ts format? In the previous mp4 format, the video would stop at 4 seconds, then suddenly jump to 10 seconds, and the screen would return to normal.

amajio commented 1 week ago

All right, how's it going so far? Will video freezes or video loss still occur after changing to ts format? In the previous mp4 format, the video would stop at 4 seconds, then suddenly jump to 10 seconds, and the screen would return to normal.

I was record with mp4 before and it's broken can't recovery or fix video file so .ts is the best for me, but I don't think video freeze or loss cause by video extension

DerBunteBall commented 1 week ago
  1. .ts just leads to a MPEG-TS (MPEG Transport Stream). That's more like a TV signal. It's pieced and can be processed on that level. Modern video players are able to play this also.
  2. MKV as well as MP4 (a normal MPEG4 file) always corrupt because they are concepted a bit different.
  3. The above code simply stops ffmpeg when the file reaches a said size. That's a bad idea when you don't want to lose parts.
  4. Actually it's not implemented but the solution would be a post processing mechanism. With that it would be possible to convert a MPEG-TS to a valid MP4 and do some splitting by time or size (size might be a bit more complex because it's not supported by ffmpeg out of the box as far as I know).
amajio commented 1 week ago
  1. .ts just leads to a MPEG-TS (MPEG Transport Stream). That's more like a TV signal. It's pieced and can be processed on that level. Modern video players are able to play this also.

    1. MKV as well as MP4 (a normal MPEG4 file) always corrupt because they are concepted a bit different.

    2. The above code simply stops FFMPEG when the file reaches a said size. That's a bad idea when you don't want to lose parts.

    3. Actually it's not implemented but the solution would be a post processing mechanism. With that it would be possible to convert a MPEG-TS to a valid MP4 and do some splitting by time or size (size might be a bit more complex because it's not supported by FFMPEG out of the box as far as I know).

Thank you for info and suggestion, With my knowledge I don't have good enough understanding about FFMPEG so this's only way I come up :P, to avoid lost parts on process stop and start new file you can put delay time.sleep(x) in process_terminate, For both file to overlap each other a little bit. by the way if you have a better way feel free to share it. XD

DerBunteBall commented 1 week ago
ffmpeg -i link ... -c copy -f mpegts file:filename.mp4.part # Get Stream as MPEG Transport Stream (MPEG-TS)
ffmpeg -i filename.mp4.part -f mp4 ... file:filename.mp4 # Make MPEG-TS to real MP4
mp4box -splits 2G filename.mp4 # Split by size - note that this might not be absolutely exact
mp4box -split 30M filename.mp4 # Split to 30 minute parts - note that this might not be absolutely exact
... do cleanup and moving

That's the simples solution I could find. mp4box is part of gpac which is part of all regular linux distros and Homebrew on macOS. A Windows Build can be also found on the project site (https://gpac.io/downloads/gpac-nightly-builds/)

Note: This produces heavy I/O load and general system load. Use NVMe's as storage and other suitable hardware. It's a bad idea to have this as a post procesing chain for a tool which at the core tries to fetch the data.

lossless1024 commented 4 days ago

I implemented this, please check out.