iyaja / llama-fs

A self-organizing file system with llama 3
MIT License
4.49k stars 259 forks source link

Index error while "batch" request #12

Open vffuunnyy opened 1 month ago

vffuunnyy commented 1 month ago

Please, configure structlog for better logging :) I did it myself, like this (not perfect, but at least something):

structlog.configure(
    processors=[
        structlog.contextvars.merge_contextvars,
        structlog.processors.add_log_level,
        structlog.processors.StackInfoRenderer(),
        structlog.dev.set_exc_info,
        structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S", utc=False),
        structlog.dev.ConsoleRenderer(),
    ],
    wrapper_class=structlog.make_filtering_bound_logger(logging.NOTSET),
    context_class=dict,
    logger_factory=structlog.PrintLoggerFactory(),
    cache_logger_on_first_use=False,
)

class StructlogInterceptHandler(logging.Handler):
    def emit(self, record):
        try:
            level = logging._nameToLevel[record.levelname]
        except KeyError:
            level = record.levelno

        frame, depth = sys._getframe(6), 6
        while frame and frame.f_code.co_filename == logging.__file__:
            frame = frame.f_back
            depth += 1

        logger = structlog.get_logger(record.name)

        logger.bind(depth=depth).log(level, record.getMessage(), exc_info=record.exc_info)

logging.basicConfig(handlers=[StructlogInterceptHandler()], level=logging.NOTSET, force=True)

Okay. About exception. I got this one:

2024-05-29 05:30:42 [debug    ] open file: C:/Users/vladi/Downloads/Cards.pdf depth=6
INFO:     127.0.0.1:65423 - "POST /batch HTTP/1.1" 500 Internal Server Error
ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "E:\llama-fs\.venv\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 399, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\llama-fs\.venv\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 70, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\llama-fs\.venv\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\applications.py", line 123, in __call__
    await self.middleware_stack(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\middleware\errors.py", line 186, in __call__
    raise exc
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\middleware\errors.py", line 164, in __call__
    await self.app(scope, receive, _send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\middleware\cors.py", line 93, in __call__
    await self.simple_response(scope, receive, send, request_headers=headers)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\middleware\cors.py", line 148, in simple_response
    await self.app(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\middleware\exceptions.py", line 65, in __call__
    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\routing.py", line 756, in __call__
    await self.middleware_stack(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\routing.py", line 776, in app
    await route.handle(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\routing.py", line 297, in handle
    await self.app(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\routing.py", line 77, in app
    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\_exception_handler.py", line 64, in wrapped_app
    raise exc
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app
    await app(scope, receive, sender)
  File "E:\llama-fs\.venv\Lib\site-packages\starlette\routing.py", line 72, in app
    response = await func(request)
               ^^^^^^^^^^^^^^^^^^^
  File "E:\llama-fs\.venv\Lib\site-packages\fastapi\routing.py", line 278, in app
    raw_response = await run_endpoint_function(
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\llama-fs\.venv\Lib\site-packages\fastapi\routing.py", line 191, in run_endpoint_function
    return await dependant.call(**values)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\llama-fs\server.py", line 106, in batch
    summaries = await get_dir_summaries(path)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\llama-fs\src\loader.py", line 20, in get_dir_summaries
    doc_dicts = load_documents(path)
                ^^^^^^^^^^^^^^^^^^^^
  File "E:\llama-fs\src\loader.py", line 68, in load_documents
    text = splitter.split_text("\n".join([d.text for d in docs]))[0]
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
IndexError: list index out of range

While processing this file:

Cards.pdf

LawJarp-A commented 1 month ago

@vffuunnyy I had the same issue, had to add an extra statement to take [0] of that only if splitter.split_text("\n".join([d.text for d in docs])) was not empty, after which it worked fine. Ended up doing this

text = splitter.split_text("\n".join([d.text for d in docs]))
            if len(text) >0:
                text = text[0]
                documents.append(Document(text=text, metadata=docs[0].metadata))

This is but a hack to get it running past this point and not a proper fix