I am trying to retrieve transcripts of some YouTube videos. I enlisted URLs in my csv file and after extraction tried to write extracted contents in txt files separately for each file. Getting the following output:

Error extracting transcript for V1: Could not retrieve a transcript for the video "URL mentioned" Client Error: Too Many Requests for url: "URL mentioned" This is most likely caused by:

Request to YouTube failed: 9kpl7AtE03c

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem! following is my code:

import csv import os import re from langchain_community.document_loaders import YoutubeLoader from langchain_community.document_loaders.youtube import TranscriptFormat

def sanitize_filename(filename): return re.sub(r'[\/*?:"<>|]', "", filename)

def document_to_string(document):

Assuming that the Document object has a method or attribute to get its text

# You might need to adjust this depending on the actual structure of the Document object
return document.text if hasattr(document, 'text') else str(document)

def extract_and_save_transcripts(csv_filepath):

Ensure the data directory exists

data_dir = 'data'
os.makedirs(data_dir, exist_ok=True)
with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as csvfile:
    reader = csv.DictReader(csvfile)
    headers = next(reader)  # Read the header row
    # print(f"CSV Headers: {headers}")  # This will print the actual headers of your CSV
    csvfile.seek(0)  # Reset the read position of the CSV file
    next(reader)  # Skip the header row
    for row in reader:
        session = row['Session']
        video_id = row['VideoID']
        video_name = sanitize_filename(row['VideoName'])
        url = row['VideoURL']

        # Initialize the YoutubeLoader with the video URL
        loader = YoutubeLoader.from_youtube_url(
            url,
            language=["de"],  # Specify other languages if necessary
            translation="en",
            transcript_format=TranscriptFormat.TEXT,
        )

        # Load the transcript
        try:
            transcript = loader.load()
            if isinstance(transcript, list):
                transcript = '\n'.join(document_to_string(doc) for doc in transcript)
            elif not isinstance(transcript, str):
                transcript = document_to_string(transcript)
            # Defining the filename for the transcript text file
            filename = os.path.join(data_dir, f"{session}_{video_id}_{video_name}.txt")
            # Save the transcript to a text file
            with open(filename, 'w', encoding='utf-8') as text_file:
                text_file.write(transcript)
            print(f"Transcript saved: {filename}")
        except Exception as e:
            print(f"Error extracting transcript for {video_id}: {e}")

csv_filepath = 'data/VideoURLs.csv' extract_and_save_transcripts(csv_filepath)

jdepoix / youtube-transcript-api

getting error Too Many Requests for url #256

Assuming that the Document object has a method or attribute to get its text

Ensure the data directory exists