celluloid / reel

UNMAINTAINED: See celluloid/celluloid#779 - Celluloid::IO-powered web server
https://celluloid.io
MIT License
595 stars 87 forks source link

Performance Related Refactoring - Phase_01 #18

Closed ghost closed 11 years ago

ghost commented 11 years ago

Before

$ httperf --hog --port 9292 --num-conns=1 --num-calls=1000
...
Request rate: 2852.5 req/s (0.4 ms/req)
...
Reply status: 1xx=0 2xx=1000 3xx=0 4xx=0 5xx=0

After

$ httperf --hog --port 9292 --num-conns=1 --num-calls=1000
...
=== obsolete ===
Request rate: 4420.1 req/s (0.2 ms/req)
=== obsolete ===

due to removing #compiler_keys cacher this decreased to about 4200 rps

...
Reply status: 1xx=0 2xx=1000 3xx=0 4xx=0 5xx=0

Commenting updates

diff --git a/lib/reel/rack_worker.rb b/lib/reel/rack_worker.rb
index a22a70c..ef06b2d 100644
--- a/lib/reel/rack_worker.rb
+++ b/lib/reel/rack_worker.rb
@@ -17,17 +17,13 @@ module Reel
     HTTP_1_1            = 'HTTP/1.1'.freeze
     CGI_1_1             = 'CGI/1.1'.freeze
     REMOTE_ADDR         = 'REMOTE_ADDR'.freeze

# REMOTE_HOST not really needed. details below
-    REMOTE_HOST         = 'REMOTE_HOST'.freeze
     CONNECTION          = 'HTTP_CONNECTION'.freeze
     SCRIPT_NAME         = 'SCRIPT_NAME'.freeze
     PATH_INFO           = 'PATH_INFO'.freeze
     REQUEST_METHOD      = 'REQUEST_METHOD'.freeze

# REQUEST_PATH, ORIGINAL_FULLPATH, REQUEST_URI not used by Rack
-    REQUEST_PATH        = 'REQUEST_PATH'.freeze
-    ORIGINAL_FULLPATH   = 'ORIGINAL_FULLPATH'.freeze
     QUERY_STRING        = 'QUERY_STRING'.freeze
-    REQUEST_URI         = 'REQUEST_URI'.freeze

# no more regexing, it is too expensive. details below
-    CONTENT_TYPE_RGXP   = /^content-type$/i.freeze
-    CONTENT_LENGTH_RGXP = /^content-length$/i.freeze
+    CONTENT_TYPE        = 'Content-Type'.freeze
+    CONTENT_LENGTH      = 'Content-Length'.freeze
     HTTP_               = 'HTTP_'.freeze
     HOST                = 'Host'.freeze

@@ -40,10 +36,18 @@ module Reel
     RACK_MULTIPROCESS = 'rack.multiprocess'.freeze
     RACK_RUN_ONCE     = 'rack.run_once'.freeze
     RACK_URL_SCHEME   = 'rack.url_scheme'.freeze

# we do not need EM-ish stuff
-    ASYNC_CALLBACK    = 'async.callback'.freeze
-    ASYNC_CLOSE       = 'async.close'.freeze

     RACK_WEBSOCKET    = 'rack.websocket'.freeze

# get request method from a Hash
# rather than use something.to_s.upcase on each request
+    REQUEST_METHODS   = {
+      :get     => 'GET'.freeze,
+      :post    => 'POST'.freeze,
+      :put     => 'PUT'.freeze,
+      :head    => 'HEAD'.freeze,
+      :delete  => 'DELETE'.freeze,
+      :options => 'OPTIONS'.freeze,
+      :patch   => 'PATCH'.freeze,
+    }
+
     PROTO_RACK_ENV = {
       RACK_VERSION      => Rack::VERSION,
       RACK_ERRORS       => STDERR,
@@ -73,8 +77,7 @@ module Reel
     end

     def handle_request(request, connection)

# splitting env into request_env and websocket_env
-      env = rack_env(request, connection)
-      status, headers, body_parts = @app.call(env)
+      status, headers, body_parts = @app.call(request_env request, connection)
       body = response_body(body_parts)

       connection.respond Response.new(status, headers, body)
@@ -84,8 +87,16 @@ module Reel
     end

     def handle_websocket(request, connection)

# splitting env into request_env and websocket_env
-      env = rack_env(request, connection)
-      @app.call(env)
+      status, *rest = @app.call(websocket_env request)
+      request.socket.close unless status < 300
+    end
+
+    def request_env request, connection
+      env(request).merge REMOTE_ADDR => connection.remote_ip
+    end
+
+    def websocket_env request
+      env(request).merge REMOTE_ADDR => request.remote_ip, RACK_WEBSOCKET => request
     end

     def response_body(body_parts)
@@ -98,51 +109,39 @@ module Reel
       end
     end

# moved to env 
-    def rack_env(request, connection)
-      env = PROTO_RACK_ENV.dup
-
-      env[SERVER_NAME] = request[HOST].to_s.split(':').first || @handler[:Host]
-      env[SERVER_PORT] = @handler[:port].to_s
+    private

-      case request
-      when WebSocket
-        remote_connection = request
-        env[RACK_WEBSOCKET] = request
-     when Request
-        remote_connection = connection
-      end
-
-      env[REMOTE_ADDR] = remote_connection.remote_ip

# DNS lookups consumes about 500 rps!
# and this is not really needed.
# in fact really not needed.
# Thin and others surviving well without this.
# there are enough resolvers in userland
# so no need to slow down the server with this.
-      env[REMOTE_HOST] = remote_connection.remote_host
-
-      env[PATH_INFO]   = request.path
-      env[REQUEST_METHOD] = request.method.to_s.upcase
+    def env request

# Hash[] is faster than hash#dup by about 50%
+      env = Hash[PROTO_RACK_ENV]

       env[RACK_INPUT] = StringIO.new(request.body || INITIAL_BODY)
       env[RACK_INPUT].set_encoding(Encoding::BINARY) if env[RACK_INPUT].respond_to?(:set_encoding)

# not really needed so excluding extra overhead
-      env[RACK_LOGGER] = @app if Rack::CommonLogger === @app

# if request contains host:port pair
# no need to spend time on @handler[:port].to_s
+      env[SERVER_NAME], env[SERVER_PORT] = (request[HOST]||'').split(':', 2)
+      env[SERVER_PORT] ||= @handler[:port].to_s

# request object does contain HTTP version data.
# trying this and fallback to server protocol
+      env[HTTP_VERSION] = request.version || env[SERVER_PROTOCOL]

# REQUEST_PATH, ORIGINAL_FULLPATH, REQUEST_URI not used by Rack
-      env[REQUEST_PATH] = request.path
-      env[ORIGINAL_FULLPATH] = request.path
-      env[REQUEST_URI] = request.path

# get request method from a Hash
# rather than use something.to_s.upcase on each request
+      env[REQUEST_METHOD] = REQUEST_METHODS[request.method] || 
+        raise(ArgumentError, "unknown request method: %s" % request.method)

# fragment not used by Rack and it will be ignored even if we attach it
-      query_string = request.query_string || ''
-      query_string << "##{request.fragment}" if request.fragment

+      env[PATH_INFO]    = request.path
+      env[QUERY_STRING] = request.query_string || ''

# both moved upper
-      env[HTTP_VERSION] ||= env[SERVER_PROTOCOL]
-      env[QUERY_STRING] = query_string

# rather than regexing against 2 strings on each header
# simply deleting content type and length from headers.
# with regexing we have a N*2 speed degradation where N is the number of headers
+      # seems headers wont be used anymore so it is safe to alter them.
+      (_ = request.headers.delete CONTENT_TYPE) && (env[CONTENT_TYPE] = _)
+      (_ = request.headers.delete CONTENT_LENGTH) && (env[CONTENT_LENGTH] = _)
-      request.headers.each do |key, val|
-        next if CONTENT_TYPE_RGXP =~ key
-        next if CONTENT_LENGTH_RGXP =~ key
-        name = HTTP_ + key
-        name.gsub!(/-/o, '_')
-        name.upcase!
-        env[name] = val

+      request.headers.each_pair do |key, val|
+        env[ HTTP_ + key.sub('-', '_').upcase ] = val
       end

       env
     end

   end
 end
diff --git a/lib/reel/websocket.rb b/lib/reel/websocket.rb
index 3cdb672..236c76c 100644
--- a/lib/reel/websocket.rb
+++ b/lib/reel/websocket.rb
@@ -5,7 +5,7 @@ module Reel
     include RemoteConnection
     include URIParts

-    attr_reader :url, :headers, :method

# adding version to comply with usual http request object 
# so env[HTTP_VERSION] = request.version || env[SERVER_PROTOCOL]
# would work on both request types
+    attr_reader :url, :headers, :method, :version

     def initialize(socket, method, url, headers)
       @socket, @method, @url, @headers = socket, method, url, headers
diff --git a/spec/reel/rack_worker_spec.rb b/spec/reel/rack_worker_spec.rb
index eafde8e..00987ee 100644
--- a/spec/reel/rack_worker_spec.rb
+++ b/spec/reel/rack_worker_spec.rb
@@ -3,14 +3,13 @@ require 'spec_helper'
 describe Reel::RackWorker do
   let(:endpoint) { URI(example_url) }

-  let(:worker) do
-    app = Proc.new do |env|
-      [200, {'Content-Type' => 'text/plain'}, ['Hello world!']]
-      [200, {'Content-Type' => 'text/plain'}, ['Hello rack world!']]
-    end

# needed to pass app through Rack::Lint
+  RackApp = Proc.new do |env|
+    [200, {'Content-Type' => 'text/plain'}, ['Hello rack world!']]
+  end

+  let(:worker) do
     handler = Rack::Handler::Reel.new
-    handler.options[:app] = app
+    handler.options[:app] = RackApp

     Reel::RackWorker.new(handler)
   end
@@ -19,7 +18,7 @@ describe Reel::RackWorker do
     with_socket_pair do |client, connection|
       client << ExampleRequest.new(:get, '/test?hello=true').to_s
       request = connection.request
-      env = worker.rack_env(request, connection)

# using new env method
+      env = worker.request_env(request, connection)

       Reel::RackWorker::PROTO_RACK_ENV.each do |k, v|
         env[k].should == v
@@ -30,17 +29,16 @@ describe Reel::RackWorker do
       env["REMOTE_ADDR"].should == "127.0.0.1"
       env["PATH_INFO"].should == "/test"
       env["REQUEST_METHOD"].should == "GET"

# REQUEST_PATH, ORIGINAL_FULLPATH, REQUEST_URI not used by Rack
-      env["REQUEST_PATH"].should == "/test"
-      env["ORIGINAL_FULLPATH"].should == "/test"
       env["QUERY_STRING"].should == "hello=true"
       env["HTTP_HOST"].should == 'www.example.com'
       env["HTTP_ACCEPT_LANGUAGE"].should == "en-US,en;q=0.8"
-      env["REQUEST_URI"].should == '/test'
-

# we do not want to spend time on reverse DNS stuff
-      %w(localhost 127.0.0.1).should include env["REMOTE_HOST"]

       env["rack.input"].should be_kind_of(StringIO)
       env["rack.input"].string.should == ''

# passing app and env through Rack::Lint
# to make sure we fully comply with Rack standards
+      validator = ::Rack::Lint.new(RackApp)
+      status, *rest = validator.call(env)
+      status.should == 200
     end
   end

@@ -51,7 +49,7 @@ describe Reel::RackWorker do
       with_socket_pair do |client, connection|
         client << handshake.to_data
         request = connection.request
-        env = worker.rack_env(request, connection)
+        env = worker.websocket_env(request)

+        env["REMOTE_ADDR"].should == "127.0.0.1"
         env["rack.websocket"].should == request
       end
shtirlic commented 11 years ago

seems ok to me

shtirlic commented 11 years ago

Also there is another check for HTTP methods here https://github.com/celluloid/reel/blob/master/lib/reel/request.rb#L35

ghost commented 11 years ago

@shtirlic, thanks for pointing me to a potential ddos issue. removed that "feature" :)

ghost commented 11 years ago

@shtirlic, regard HTTP methods check, that file will be updated on Phase_02

tarcieri commented 11 years ago

This looks good to me