otto8-ai / otto8

Open source AI Agent Platform
Apache License 2.0
9 stars 9 forks source link

Knowledge - Website - Stuck in "syncing" state even after all the files have been scrapped. #609

Closed sangee2004 closed 2 hours ago

sangee2004 commented 3 hours ago

Version:

 "github.com/otto8-ai/tools": "c47df03a1857be27eb23512acbe48b29368ba555",
  "otto": "v0.0.0-dev+ba5f61a0"

Steps to reproduce the problem:

  1. Create an agent with knowledge from website (In my case https://nginx.org/en/docs)

Notice thar even after all the files are scrapped, status is shown as "Syncing" forever.

Screenshot 2024-11-15 at 10 39 43 AM

Sync logs-

{
  "frames": {
    "1731696585": {
      "chatResponseCached": false,
      "currentAgent": {

      },
      "displayText": "Running Sync and scrape website from /otto8-tools/knowledge/data-sources/website/tool.gpt",
      "end": "0001-01-01T00:00:00Z",
      "id": "1731696585",
      "input": "{\"websiteCrawlingConfig\":{\"urls\":[\"https://nginx.org/en/docs\"]}}",
      "inputContext": null,
      "llmRequest": {
        "command": [
          "/bin/sh",
          "-c",
          "exec ${GPTSCRIPT_TOOL_DIR}/bin/gptscript-go-tool"
        ],
        "input": "{\"websiteCrawlingConfig\":{\"urls\":[\"https://nginx.org/en/docs\"]}}"
      },
      "llmResponse": null,
      "output": [
        {
          "content": "time=\"2024-11-15T18:31:46Z\" level=info msg=\"scraping https://nginx.org/en/docs/\"\ntime=\"2024-11-15T18:31:47Z\" level=info msg=\"scraping https://nginx.org/en/docs/faq.html\"\ntime=\"2024-11-15T18:31:48Z\" level=info msg=\"scraping https://nginx.org/en/docs/njs/\"\ntime=\"2024-11-15T18:31:49Z\" level=info msg=\"scraping https://nginx.org/en/docs/install.html\"\ntime=\"2024-11-15T18:31:51Z\" level=info msg=\"scraping https://nginx.org/en/linux_packages.html\"\ntime=\"2024-11-15T18:31:52Z\" level=info msg=\"scraping https://nginx.org/en/docs/configure.html\"\ntime=\"2024-11-15T18:31:53Z\" level=info msg=\"scraping https://nginx.org/en/docs/ngx_core_module.html\"\ntime=\"2024-11-15T18:31:54Z\" level=info msg=\"scraping https://nginx.org/en/docs/events.html\"\ntime=\"2024-11-15T18:31:55Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_core_module.html\"\ntime=\"2024-11-15T18:31:59Z\" level=info msg=\"scraping https://nginx.org/en/docs/debugging_log.html\"\ntime=\"2024-11-15T18:32:00Z\" level=info msg=\"scraping https://nginx.org/en/docs/control.html\"\ntime=\"2024-11-15T18:32:01Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_perl_module.html\"\ntime=\"2024-11-15T18:32:02Z\" level=info msg=\"scraping https://nginx.org/en/docs/syslog.html\"\ntime=\"2024-11-15T18:32:03Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_log_module.html\"\ntime=\"2024-11-15T18:32:04Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_ssl_module.html\"\ntime=\"2024-11-15T18:32:05Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_v2_module.html\"\ntime=\"2024-11-15T18:32:06Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_v3_module.html\"\ntime=\"2024-11-15T18:32:08Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_realip_module.html\"\ntime=\"2024-11-15T18:32:09Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_addition_module.html\"\ntime=\"2024-11-15T18:32:10Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_xslt_module.html\"\ntime=\"2024-11-15T18:32:11Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_image_filter_module.html\"\ntime=\"2024-11-15T18:32:13Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_geoip_module.html\"\ntime=\"2024-11-15T18:32:14Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_sub_module.html\"\ntime=\"2024-11-15T18:32:15Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_dav_module.html\"\ntime=\"2024-11-15T18:32:16Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_flv_module.html\"\ntime=\"2024-11-15T18:32:17Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_mp4_module.html\"\ntime=\"2024-11-15T18:32:18Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_gunzip_module.html\"\ntime=\"2024-11-15T18:32:19Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_gzip_static_module.html\"\ntime=\"2024-11-15T18:32:21Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_auth_request_module.html\"\ntime=\"2024-11-15T18:32:21Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_random_index_module.html\"\ntime=\"2024-11-15T18:32:22Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_secure_link_module.html\"\ntime=\"2024-11-15T18:32:23Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_slice_module.html\"\ntime=\"2024-11-15T18:32:24Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_stub_status_module.html\"\ntime=\"2024-11-15T18:32:25Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_charset_module.html\"\ntime=\"2024-11-15T18:32:26Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_gzip_module.html\"\ntime=\"2024-11-15T18:32:27Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_ssi_module.html\"\ntime=\"2024-11-15T18:32:28Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_userid_module.html\"\ntime=\"2024-11-15T18:32:29Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_access_module.html\"\ntime=\"2024-11-15T18:32:30Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_auth_basic_module.html\"\ntime=\"2024-11-15T18:32:31Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_mirror_module.html\"\ntime=\"2024-11-15T18:32:32Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_autoindex_module.html\"\ntime=\"2024-11-15T18:32:34Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_index_module.html\"\ntime=\"2024-11-15T18:32:35Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_geo_module.html\"\ntime=\"2024-11-15T18:32:36Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_map_module.html\"\ntime=\"2024-11-15T18:32:37Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_split_clients_module.html\"\ntime=\"2024-11-15T18:32:37Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_referer_module.html\"\ntime=\"2024-11-15T18:32:38Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_rewrite_module.html\"\ntime=\"2024-11-15T18:32:40Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_proxy_module.html\"\ntime=\"2024-11-15T18:32:41Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_fastcgi_module.html\"\ntime=\"2024-11-15T18:32:42Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_uwsgi_module.html\"\ntime=\"2024-11-15T18:32:43Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_scgi_module.html\"\ntime=\"2024-11-15T18:32:44Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_grpc_module.html\"\ntime=\"2024-11-15T18:32:45Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_memcached_module.html\"\ntime=\"2024-11-15T18:32:46Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_limit_conn_module.html\"\ntime=\"2024-11-15T18:32:47Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_limit_req_module.html\"\ntime=\"2024-11-15T18:32:48Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_empty_gif_module.html\"\ntime=\"2024-11-15T18:32:49Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_browser_module.html\"\ntime=\"2024-11-15T18:32:50Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_upstream_module.html\"\ntime=\"2024-11-15T18:32:51Z\" level=info msg=\"downloading PDF https://homes.cs.washington.edu/~karlin/papers/balls.pdf\"\ntime=\"2024-11-15T18:32:53Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_core_module.html\"\ntime=\"2024-11-15T18:32:54Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_ssl_module.html\"\ntime=\"2024-11-15T18:32:56Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_pop3_module.html\"\ntime=\"2024-11-15T18:32:56Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_imap_module.html\"\ntime=\"2024-11-15T18:32:58Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_smtp_module.html\"\ntime=\"2024-11-15T18:32:59Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_core_module.html\"\ntime=\"2024-11-15T18:33:00Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_ssl_module.html\"\ntime=\"2024-11-15T18:33:03Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_realip_module.html\"\ntime=\"2024-11-15T18:33:04Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_geoip_module.html\"\ntime=\"2024-11-15T18:33:05Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_ssl_preread_module.html\"\ntime=\"2024-11-15T18:33:06Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_limit_conn_module.html\"\ntime=\"2024-11-15T18:33:07Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_access_module.html\"\ntime=\"2024-11-15T18:33:08Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_geo_module.html\"\ntime=\"2024-11-15T18:33:09Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_map_module.html\"\ntime=\"2024-11-15T18:33:10Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_split_clients_module.html\"\ntime=\"2024-11-15T18:33:11Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_return_module.html\"\ntime=\"2024-11-15T18:33:12Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_set_module.html\"\ntime=\"2024-11-15T18:33:14Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_upstream_module.html\"\ntime=\"2024-11-15T18:33:15Z\" level=info msg=\"scraping https://nginx.org/en/docs/ngx_google_perftools_module.html\"\ntime=\"2024-11-15T18:33:18Z\" level=info msg=\"scraping https://nginx.org/en/docs/beginners_guide.html\"\ntime=\"2024-11-15T18:33:19Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/request_processing.html\"\ntime=\"2024-11-15T18:33:20Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/server_names.html\"\ntime=\"2024-11-15T18:33:21Z\" level=info msg=\"scraping https://nginx.org/en/docs/hash.html\"\ntime=\"2024-11-15T18:33:22Z\" level=info msg=\"scraping https://nginx.org/en/docs/syntax.html\"\ntime=\"2024-11-15T18:33:24Z\" level=info msg=\"scraping https://nginx.org/en/docs/switches.html\"\ntime=\"2024-11-15T18:33:24Z\" level=info msg=\"scraping https://nginx.org/en/docs/windows.html\"\ntime=\"2024-11-15T18:33:26Z\" level=info msg=\"scraping https://nginx.org/en/download.html\"\ntime=\"2024-11-15T18:33:27Z\" level=info msg=\"scraping https://nginx.org/en/docs/quic.html\"\ntime=\"2024-11-15T18:33:28Z\" level=info msg=\"downloading PDF http://vger.kernel.org/lpc_net2018_talks/willemdebruijn-lpc2018-udpgso-paper-DRAFT-1.pdf\"\ntime=\"2024-11-15T18:33:29Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/load_balancing.html\"\ntime=\"2024-11-15T18:33:30Z\" level=info msg=\"scraping https://nginx.org/en/\"\ntime=\"2024-11-15T18:33:36Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/configuring_https_servers.html\"\ntime=\"2024-11-15T18:33:37Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/stream_processing.html\"\ntime=\"2024-11-15T18:33:39Z\" level=info msg=\"scraping https://nginx.org/en/docs/njs/index.html\"\ntime=\"2024-11-15T18:33:40Z\" level=info msg=\"scraping https://nginx.org/en/docs/howto_build_on_win32.html\"\ntime=\"2024-11-15T18:33:41Z\" level=info msg=\"scraping https://nginx.org/en/docs/nginx_dtrace_pid_provider.html\"\ntime=\"2024-11-15T18:33:42Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/converting_rewrite_rules.html\"\ntime=\"2024-11-15T18:33:43Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/websocket.html\"\ntime=\"2024-11-15T18:33:44Z\" level=info msg=\"scraping https://nginx.org/en/docs/contributing_changes.html\"\ntime=\"2024-11-15T18:33:46Z\" level=info msg=\"scraping https://nginx.org/en/docs/dev/development_guide.html\"\ntime=\"2024-11-15T18:33:48Z\" level=info msg=\"scraping https://nginx.org/en/docs/index.html\"\ntime=\"2024-11-15T18:33:49Z\" level=info msg=\"scraping https://nginx.org/en/docs/dirindex.html\"\ntime=\"2024-11-15T18:33:51Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_log_module.html\"\ntime=\"2024-11-15T18:33:52Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_headers_module.html\"\ntime=\"2024-11-15T18:33:53Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_api_module.html\"\ntime=\"2024-11-15T18:33:54Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_keyval_module.html\"\ntime=\"2024-11-15T18:33:55Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_zone_sync_module.html\"\ntime=\"2024-11-15T18:33:57Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_keyval_module.html\"\ntime=\"2024-11-15T18:33:58Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_status_module.html\"\ntime=\"2024-11-15T18:33:59Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_upstream_hc_module.html\"\ntime=\"2024-11-15T18:34:00Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_upstream_hc_module.html\"\ntime=\"2024-11-15T18:34:01Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_auth_http_module.html\"\ntime=\"2024-11-15T18:34:02Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_auth_jwt_module.html\"\ntime=\"2024-11-15T18:34:04Z\" level=info msg=\"scraping https://nginx.org/en/docs/ngx_mgmt_module.html\"\ntime=\"2024-11-15T18:34:05Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_f4f_module.html\"\ntime=\"2024-11-15T18:34:06Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_hls_module.html\"\ntime=\"2024-11-15T18:34:08Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_internal_redirect_module.html\"\ntime=\"2024-11-15T18:34:09Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_js_module.html\"\ntime=\"2024-11-15T18:34:11Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_js_module.html\"\ntime=\"2024-11-15T18:34:12Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_mqtt_filter_module.html\"\ntime=\"2024-11-15T18:34:13Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_mqtt_preread_module.html\"\ntime=\"2024-11-15T18:34:14Z\" level=info msg=\"scraping https://nginx.org/en/docs/ngx_otel_module.html\"\ntime=\"2024-11-15T18:34:15Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_pass_module.html\"\ntime=\"2024-11-15T18:34:16Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_proxy_module.html\"\ntime=\"2024-11-15T18:34:17Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_proxy_module.html\"\ntime=\"2024-11-15T18:34:19Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_session_log_module.html\"\ntime=\"2024-11-15T18:34:20Z\" level=info msg=\"scraping https://nginx.org/en/docs/mail/ngx_mail_realip_module.html\"\ntime=\"2024-11-15T18:34:21Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_upstream_conf_module.html\"\ntime=\"2024-11-15T18:34:22Z\" level=info msg=\"scraping https://nginx.org/en/docs/varindex.html\"\ntime=\"2024-11-15T18:34:23Z\" level=info msg=\"scraping https://nginx.org/en/docs/http/ngx_http_proxy_protocol_vendor_module.html\"\ntime=\"2024-11-15T18:34:24Z\" level=info msg=\"scraping https://nginx.org/en/docs/stream/ngx_stream_proxy_protocol_vendor_module.html\"\ntime=\"2024-11-15T18:34:25Z\" level=info msg=\"scraping https://nginx.org/en/index.html\"\ntime=\"2024-11-15T18:34:26Z\" level=info msg=\"scraping https://nginx.org/en/docs/welcome_nginx_facebook.html\"\ntime=\"2024-11-15T18:34:27Z\" level=info msg=\"scraping https://nginx.org/en/docs/faq/license_copyright.html\"\ntime=\"2024-11-15T18:34:28Z\" level=info msg=\"scraping https://nginx.org/en/docs/faq/accept_failed.html\"\ntime=\"2024-11-15T18:34:29Z\" level=info msg=\"scraping https://nginx.org/en/docs/faq/variables_in_config.html\"\ntime=\"2024-11-15T18:34:30Z\" level=info msg=\"scraping https://nginx.org/en/docs/faq/daemon_master_process_off.html\"\ntime=\"2024-11-15T18:34:31Z\" level=info msg=\"scraping https://nginx.org/en/docs/faq/chunked_encoding_from_backend.html\"\ntime=\"2024-11-15T18:34:32Z\" level=info msg=\"scraping https://nginx.org/en/docs/sys_errlist.html\"\n",
          "subCalls": null
        }
      ],
      "start": "2024-11-15T18:31:45.31898343Z",
      "tool": {
        "description": "Scrape website and sync down as markdown files",
        "id": "/otto8-tools/knowledge/data-sources/website/tool.gpt:Sync and scrape website",
        "instructions": "#!${GPTSCRIPT_TOOL_DIR}/bin/gptscript-go-tool",
        "internalPrompt": null,
        "localTools": {
          "sync and scrape website": "/otto8-tools/knowledge/data-sources/website/tool.gpt:Sync and scrape website"
        },
        "modelName": "gpt-4o",
        "name": "Sync and scrape website",
        "source": {
          "lineNo": 1,
          "location": "/otto8-tools/knowledge/data-sources/website/tool.gpt"
        },
        "workingDir": "/otto8-tools/knowledge/data-sources/website"
      },
      "toolResults": 0,
      "type": "callProgress",
      "usage": {

      }
    }
  },
  "spec": {
    "synchronous": true,
    "threadName": "t1-ks1f6kqz",
    "input": "{\"websiteCrawlingConfig\":{\"urls\":[\"https://nginx.org/en/docs\"]}}",
    "tool": "\"website-data-source\"",
    "credentialContextIDs": [
      "a1dkntg"
    ]
  },
  "status": {
    "state": "running",
    "output": "",
    "endTime": null
  }
}
sangee2004 commented 2 hours ago

This issue is not seen any more when testing with latest builds

  "github.com/otto8-ai/tools": "c47df03a1857be27eb23512acbe48b29368ba555",
  "otto": "v0.0.0-dev+fdad35a6"