CommandDash / commanddash

AI assist to integrate APIs and SDKs without reading docs.
https://commanddash.io
Apache License 2.0
299 stars 49 forks source link

Indentation is broken in displayed code blocks. #302

Closed samyakkkk closed 4 months ago

samyakkkk commented 4 months ago

What version are you using?

This affects python code most prominently since wrong indentation leads to compile time errors.

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from collections import deque

def crawl_website(start_url, max_depth=2):
    """Crawls a website to find all URLs within a given depth.

    Args:
        start_url: The starting URL of the website to crawl.
        max_depth: The maximum depth to crawl.

    Returns:
        A set of all URLs found on the website.
    """

    visited_urls = set()
    urls_to_visit = deque([start_url])
    current_depth = 0

    while urls_to_visit and current_depth <= max_depth:
    url = urls_to_visit.popleft()

    if url not in visited_urls:
        visited_urls.add(url)
        print(f"Crawling: {url}")

        try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        for link in soup.find_all('a', href=True):
            absolute_url = urljoin(url, link['href'])
            if absolute_url not in visited_urls:
            urls_to_visit.append(absolute_url)

        except requests.exceptions.RequestException as e:
        print(f"Error crawling {url}: {e}")

    current_depth += 1

    return visited_urls

# Example usage:
start_url = "https://www.example.com"  # Replace with your website
found_urls = crawl_website(start_url)

print("\nAll URLs found:")
for url in found_urls:
    print(url)

What happened?

-

Steps to reproduce

-

Supporting info to reproduce

No response

Relevant log output

No response