garycourt / uri-js

An RFC 3986 compliant, scheme extendable URI parsing/validating/normalizing/resolving library for JavaScript
Other
305 stars 69 forks source link

parse hangs on some long urls #12

Closed dotchev closed 7 years ago

dotchev commented 8 years ago

During penetration tests with Burp Suite we found that our app hangs on some requests. It turned out that uri-js parse hangs on some long urls. Here is a simple script to reproduce it:

var uri = require('uri-js');

var uris = [
  '/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows\\win.ini',
  '/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows\\win.ini',
  '/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows\\win.ini',
  '/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows\\win.ini'
];

for (var i in uris) {
  console.log(uris[i]);
  console.time('time');
  var p = uri.parse(uris[i])
  console.timeEnd('time');
  console.log(p);
}

Here is the result on my machine

/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows\win.ini
time: 732ms
{ error: 'URI is not strictly valid.',
  scheme: undefined,
  userinfo: undefined,
  host: undefined,
  port: undefined,
  path: '/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/windows%5Cwin.ini',
  query: undefined,
  fragment: undefined,
  reference: 'relative' }
/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows\win.ini
time: 3655ms
{ error: 'URI is not strictly valid.',
  scheme: undefined,
  userinfo: undefined,
  host: undefined,
  port: undefined,
  path: '/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/windows%5Cw
in.ini',
  query: undefined,
  fragment: undefined,
  reference: 'relative' }
/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows
\win.ini
time: 18254ms
{ error: 'URI is not strictly valid.',
  scheme: undefined,
  userinfo: undefined,
  host: undefined,
  port: undefined,
  path: '/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%A
E/windows%5Cwin.ini',
  query: undefined,
  fragment: undefined,
  reference: 'relative' }
/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%
c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/windows\win.ini

Notice how the time increases exponentially - about 5x for each new segment. So with long enough URL parse just hangs at 100% CPU.

It seems uri-js uses a generated regex which in my case appears to be this:

/(^((?:[A-Za-z][A-Za-z0-9\+\-\.]*))\:(?:(?:\/\/((?:((?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-F
a-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)
\*\+\,\;\=\:])*))@)?((?:(?:\[(?:(?:[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:]+)|(?:v[0-
9A-Fa-f]+\.[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:]+))\])|(?:(?:(?:25[0-5])|(?:2[0-4]
[0-9])|(?:1[0-9][0-9])|(?:[1-9][0-9])|[0-9])\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9][0-9])|(?:[1-9][0-9])|[0-9])\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1
[0-9][0-9])|(?:[1-9][0-9])|[0-9])\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9][0-9])|(?:[1-9][0-9])|[0-9]))(?!(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0
-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u
2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=])*))|(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?
:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0
-\uFFEF\!\$\&\'\(\)\*\+\,\;\=])*)))(?:\:((?:[0-9]*)))?))?((?:(?:\/(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89
A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFF
EF\!\$\&\'\(\)\*\+\,\;\=\:\@])*))*)|(?:\/(?:(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-
9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;
\=\:\@])+)(?:(?:\/(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[
0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])*))*))?)|(?:(?:(?:
(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|
[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])+)(?:(?:\/(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[
0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u2
00D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])*))*))|(?!(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-F
a-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF
\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@]))))(?:\?((?:(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89
A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFF
EF\!\$\&\'\(\)\*\+\,\;\=\:\@])|[\/\?\uE000-\uF8FF])*)))?(?:\#((?:(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A
-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFE
F\!\$\&\'\(\)\*\+\,\;\=\:\@])|[\/\?])*)))?$)|(^(){0}(?:(?:\/\/((?:((?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[8
9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uF
FEF\!\$\&\'\(\)\*\+\,\;\=\:])*))@)?((?:(?:\[(?:(?:[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;
\=\:]+)|(?:v[0-9A-Fa-f]+\.[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:]+))\])|(?:(?:(?:25[
0-5])|(?:2[0-4][0-9])|(?:1[0-9][0-9])|(?:[1-9][0-9])|[0-9])\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9][0-9])|(?:[1-9][0-9])|[0-9])\.(?:(?:25[0-5])|(?:2[
0-4][0-9])|(?:1[0-9][0-9])|(?:[1-9][0-9])|[0-9])\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9][0-9])|(?:[1-9][0-9])|[0-9]))(?!(?:(?:(?:(?:%[EFef][0-9A-Fa-f
]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-
\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=])*))|(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f]
[0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF9
00-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=])*)))(?:\:((?:[0-9]*)))?))?((?:(?:\/(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A
-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\u
FDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])*))*)|(?:\/(?:(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f]
[0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\
&\'\(\)\*\+\,\;\=\:\@])+)(?:(?:\/(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-
9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])*))
*))?)|(?:(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f
][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\@])+)(?:(?:\/(?:(?:(?:(?:%[EFef]
[0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\
.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])*))*))|(?!(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-
Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029
\u202F-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@]))))(?:\?((?:(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-F
a-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFD
CF\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])|[\/\?\uE000-\uF8FF])*)))?(?:\#((?:(?:(?:(?:(?:%[EFef][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa
-f])|(?:%[89A-Fa-f][0-9A-Fa-f]%[0-9A-Fa-f][0-9A-Fa-f])|(?:%[0-9A-Fa-f][0-9A-Fa-f]))|[A-Za-z0-9\-\.\_\~\xA0-\u200D\u2010-\u2029\u202F-\uD7FF\uF900-\uFDC
F\uFDF0-\uFFEF\!\$\&\'\(\)\*\+\,\;\=\:\@])|[\/\?])*)))?$)/
garycourt commented 8 years ago

Try editing the uri-js/build/uri.js file and changing the variable URI__VALIDATE_SUPPORT to false. This should give you better performance. In the next release, I'll make this an option you can pass in.

dotchev commented 8 years ago

Yes, now with this change parse completes immediately, also the used regex is much simpler

/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c
0%ae%c0%ae/%c0%ae%c0%ae/windows\win.ini
/^(?:([^:\/?#]+):)?(?:\/\/((?:([^\/?#@]*)@)?([^\/?#:]*)(?:\:(\d*))?))?([^?#]*)(?:\?([^#]*))?(?:#((?:.|\n)*))?/i
time: 0ms
{ scheme: undefined,
  userinfo: undefined,
  host: undefined,
  port: undefined,
  path: '/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/%C0%AE
%C0%AE/%C0%AE%C0%AE/%C0%AE%C0%AE/windows%5Cwin.ini',
  query: undefined,
  fragment: undefined,
  reference: 'relative' }

You can close this issue when you provide it as an option.

garycourt commented 7 years ago

I've removed validation in v3 of URI.js, so this is no longer an issue.