zrashwani / arachnid

Crawl all unique internal links found on a given website, and extract SEO related information - supports javascript based sites
MIT License
253 stars 60 forks source link

Images treated as 404 - false positive #25

Closed mkantautas closed 7 years ago

mkantautas commented 7 years ago

Hello, in this case images are found as 404, while in reality they have good urls. This should be fixed.

array:8 [▼
  "/images/2017-putsschema-1.png" => array:9 [▼
    "original_urls" => array:1 [▼
      "/images/2017-putsschema-1.png" => "/images/2017-putsschema-1.png"
    ]
    "links_text" => array:1 [▼
      "PUTSSCHEMA 1" => "PUTSSCHEMA 1"
    ]
    "absolute_url" => "https://ssfonsterputs.se/images/2017-putsschema-1.png"
    "external_link" => false
    "visited" => false
    "frequency" => 1
    "source_link" => "https://ssfonsterputs.se/putsschema/"
    "depth" => 2
    "status_code" => 404
  ]
  "/images/2017-putsschema-2.png" => array:9 [▼
    "original_urls" => array:1 [▶]
    "links_text" => array:1 [▶]
    "absolute_url" => "https://ssfonsterputs.se/images/2017-putsschema-2.png"
    "external_link" => false
    "visited" => false
    "frequency" => 1
    "source_link" => "https://ssfonsterputs.se/putsschema/"
    "depth" => 2
    "status_code" => 404
  ]
  "/images/2017-putsschema-3.png" => array:9 [▼
    "original_urls" => array:1 [▶]
    "links_text" => array:1 [▶]
    "absolute_url" => "https://ssfonsterputs.se/images/2017-putsschema-3.png"
    "external_link" => false
    "visited" => false
    "frequency" => 1
    "source_link" => "https://ssfonsterputs.se/putsschema/"
    "depth" => 2
    "status_code" => 404
  ]
  "/images/2017-putsschema-4.png" => array:9 [▶]
  "/images/2017-putsschema-5.png" => array:9 [▶]
  "/images/2017-putsschema-6.png" => array:9 [▶]
  "/images/2017-putsschema-7.png" => array:9 [▶]
  "/images/2017-putsschema-8.png" => array:9 [▶]
]
zrashwani commented 7 years ago

Hello @neorganic this is fixed now - in fact I didn't find anything wrong after the recent work done on the package - the output now as following:

    [/images/2017-putsschema-2.png] => Array
        (
            [original_urls] => Array
                (
                    [/images/2017-putsschema-2.png] => /images/2017-putsschema-2.png
                )

            [links_text] => Array
                (
                    [PUTSSCHEMA 2] => PUTSSCHEMA 2
                )

            [absolute_url] => https://ssfonsterputs.se/images/2017-putsschema-2.png
            [external_link] => 
            [visited] => 
            [frequency] => 1
            [source_link] => https://ssfonsterputs.se/putsschema/
            [depth] => 1
            [status_code] => 200
        )
mkantautas commented 7 years ago

Fixed.