From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-1.5 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, MSGID_RANDY,URIBL_BLOCKED shortcircuit=no autolearn=no version=3.3.2 X-Original-To: spew@80x24.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D885C1F8A1; Mon, 22 Dec 2014 21:53:55 +0000 (UTC) Date: Mon, 22 Dec 2014 21:53:55 +0000 From: Eric Wong To: spew@80x24.org Subject: [PATCH] uri optimizations, too ugly Message-ID: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable List-Id: Unfortunately, these make evaluating effects of any future compile.c optimizations more difficult. [misc #10628] https://bugs.ruby-lang.org/issues/10628 * use opt_str_freeze to reduce duplicates * avoid regenerating hashe in parser.regexp * reduced bytecode size of conditionals benchmark results: target 0: 2.1.5 (ruby 2.1.5p273 (2014-11-13 revision 48405) [x86_64-linux])= at "/home/ew/ruby-2.1/bin/ruby" target 1: trunk (ruby 2.2.0dev (2014-12-22 trunk 48922) [x86_64-linux]) at = "/home/ew/rrrr/b/i/bin/ruby" target 2: built (ruby 2.2.0dev (2014-12-22 trunk 48922) [x86_64-linux]) at = "/home/ew/ruby/b/i/bin/ruby" ----------------------------------------------------------- raw data: [["app_uri", [[0.4858027193695307, 0.48909279331564903, 0.4869739431887865, 0.4856073558330536, 0.49060618318617344, 0.49414661154150963, 0.48784281872212887, 0.4851597473025322, 0.48379900865256786, 0.48618787340819836], [0.589355481788516, 0.6005589235574007, 0.6023986879736185, 0.586976544931531, 0.6007280834019184, 0.5901837293058634, 0.5893201008439064, 0.5839062985032797, 0.5905469041317701, 0.6007170639932156], [0.5123783405870199, 0.5250121373683214, 0.5028673857450485, 0.49962601624429226, 0.5074941627681255, 0.5039216671139002, 0.5183564182370901, 0.5083295572549105, 0.5006583165377378, 0.5104942582547665]]]] Elapsed time: 15.90131543 (sec) ----------------------------------------------------------- benchmark results: minimum results in each 10 measurements. Execution time (sec) name 2.1.5 trunk built app_uri 0.484 0.584 0.500 Speedup ratio: compare with the result of `2.1.5' (greater is better) name trunk built app_uri 0.829 0.968 --- lib/uri/generic.rb | 23 +++++------- lib/uri/rfc3986_parser.rb | 93 +++++++++++++++++++++++++++----------------= ---- 2 files changed, 63 insertions(+), 53 deletions(-) diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb index c0b94a8..6559cd8 100644 --- a/lib/uri/generic.rb +++ b/lib/uri/generic.rb @@ -543,7 +543,7 @@ module URI # if properly formatted as 'user:password' def split_userinfo(ui) return nil, nil unless ui - user, password =3D ui.split(/:/, 2) + user, password =3D ui.split(':'.freeze, 2) =20 return user, password end @@ -695,13 +695,7 @@ module URI # see also URI::Generic.port=3D # def set_port(v) - unless !v || v.kind_of?(Fixnum) - if v.empty? - v =3D nil - else - v =3D v.to_i - end - end + v =3D v.empty? ? nil : v.to_i unless !v || v.kind_of?(Fixnum) @port =3D v end protected :set_port @@ -768,13 +762,14 @@ module URI =20 # If scheme is ftp, path may be relative. # See RFC 1738 section 3.2.2, and RFC 2396. - if @scheme && @scheme !=3D "ftp" - if v && v !=3D '' && parser.regexp[:ABS_PATH] !~ v + if @scheme && @scheme !=3D "ftp".freeze + if v && v !=3D ''.freeze && parser.regexp[:ABS_PATH] !~ v raise InvalidComponentError, "bad component(expected absolute path component): #{v}" end else - if v && v !=3D '' && parser.regexp[:ABS_PATH] !~ v && parser.regex= p[:REL_PATH] !~ v + if v && v !=3D ''.freeze && parser.regexp[:ABS_PATH] !~ v && + parser.regexp[:REL_PATH] !~ v raise InvalidComponentError, "bad component(expected relative path component): #{v}" end @@ -849,7 +844,7 @@ module URI x =3D v.to_str v =3D x.dup if x.equal? v v.encode!(Encoding::UTF_8) rescue nil - v.delete!("\t\r\n") + v.delete!("\t\r\n".freeze) v.force_encoding(Encoding::ASCII_8BIT) v.gsub!(/(?!%\h\h|[!$-&(-;=3D?-Z_a-~])./n.freeze){'%%%02X'.freeze % = $&.ord} v.force_encoding(Encoding::US_ASCII) @@ -939,9 +934,9 @@ module URI x =3D v.to_str v =3D x.dup if x.equal? v v.encode!(Encoding::UTF_8) rescue nil - v.delete!("\t\r\n") + v.delete!("\t\r\n".freeze) v.force_encoding(Encoding::ASCII_8BIT) - v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X' % $&.ord} + v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X'.freeze % $&.ord} v.force_encoding(Encoding::US_ASCII) @fragment =3D v end diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb index 946f374..3923b06 100644 --- a/lib/uri/rfc3986_parser.rb +++ b/lib/uri/rfc3986_parser.rb @@ -4,6 +4,11 @@ module URI # this regexp is modified not to host is not empty string RFC3986_URI =3D /\A(?(?[A-Za-z][+\-.0-9A-Za-z]*):(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=3DA-Z_a-z~])*)@)?(?= (?\[(?:(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1= ,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?:= :(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g= |(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\= h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{= 1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v= \h+\.[!$&-.0-;=3DA-Z_a-z~]+))\])|\g|(?(?:%\h\h|[!$&-= =2E0-9;=3DA-Z_a-z~])+))?(?::(?\d*))?)(?(?:\/(?= (?:%\h\h|[!$&-.0-;=3D@-Z_a-z~])*))*)|(?\/(?:(?(?= :%\h\h|[!$&-.0-;=3D@-Z_a-z~])+)(?:\/\g)*)?)|(?\g(?:\/\g)*)|(?))(?:\?(?[^#]*))?(?:\#(?<= fragment>(?:%\h\h|[!$&-.0-;=3D@-Z_a-z~\/?])*))?)\z/ RFC3986_relative_ref =3D /\A(?(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=3DA-Z_a-z~])*)@)?(?(?\[(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?::(?:\h{1,4}:){4}\= g|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}= :){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1= ,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1= ,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h+\.[!$&-.0-;= =3DA-Z_a-z~]+)\])|\g|(?(?:%\h\h|[!$&-.0-9;=3DA-Z_a-z= ~])+))?(?::(?\d*))?)(?(?:\/(?(?:%\h\h|[!$&-.0-= ;=3D@-Z_a-z~])*))*)|(?\/(?:(?(?:%\h\h|[!$&-.0-;= =3D@-Z_a-z~])+)(?:\/\g)*)?)|(?(?(?:%= \h\h|[!$&-.0-9;=3D@-Z_a-z~])+)(?:\/\g)*)|(?))(?:\?(?[^#]*))?(?:\#(?(?:%\h\h|[!$&-.0-;=3D@-Z_a-z~\/?])*))?)\z/ + attr_reader :regexp + + def initialize + @regexp =3D default_regexp.each_value(&:freeze).freeze + end =20 def split(uri) #:nodoc: begin @@ -11,42 +16,52 @@ module URI rescue NoMethodError raise InvalidURIError, "bad URI(is not URI?): #{uri}" end - unless uri.ascii_only? + uri.ascii_only? or raise InvalidURIError, "URI must be ascii only #{uri.dump}" - end if m =3D RFC3986_URI.match(uri) - ary =3D [] - ary << m["scheme"] - if m["path-rootless"] # opaque - ary << nil # userinfo - ary << nil # host - ary << nil # port - ary << nil # registry - ary << nil # path - ary << m["path-rootless"] - ary[-1] << '?' << m["query"] if m["query"] - ary << nil # query - ary << m["fragment"] + query =3D m["query".freeze] + scheme =3D m["scheme".freeze] + opaque =3D m["path-rootless".freeze] + if opaque + opaque << "?#{query}" if query + [ scheme, + nil, # userinfo + nil, # host + nil, # port + nil, # registry + nil, # path + opaque, + nil, # query + m["fragment".freeze] + ] else # normal - ary << m["userinfo"] - ary << m["host"] - ary << m["port"] - ary << nil # registry - ary << (m["path-abempty"] || m["path-absolute"] || m["path-empty= "]) - ary << nil # opaque - ary << m["query"] - ary << m["fragment"] + [ scheme, + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-empty".freeze]), + nil, # opaque + query, + m["fragment".freeze] + ] end elsif m =3D RFC3986_relative_ref.match(uri) - ary =3D [nil] - ary << m["userinfo"] - ary << m["host"] - ary << m["port"] - ary << nil # registry - ary << (m["path-abempty"] || m["path-absolute"] || m["path-noschem= e"] || m["path-empty"]) - ary << nil # opaque - ary << m["query"] - ary << m["fragment"] + [ nil, # scheme + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry, + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-noscheme".freeze] || + m["path-empty".freeze]), + nil, # opaque + m["query".freeze], + m["fragment".freeze] + ] else raise InvalidURIError, "bad URI(is not URI?): #{uri}" end @@ -55,11 +70,11 @@ module URI def parse(uri) # :nodoc: scheme, userinfo, host, port, registry, path, opaque, query, fragment =3D self.split(uri) - - if scheme && URI.scheme_list.include?(scheme.upcase) - URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, - registry, path, opaque, query, - fragment, self) + scheme_list =3D URI.scheme_list + if scheme && scheme_list.include?(uc =3D scheme.upcase) + scheme_list[uc].new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) else Generic.new(scheme, userinfo, host, port, registry, path, opaque, query, @@ -78,7 +93,9 @@ module URI @@to_s.bind(self).call end =20 - def regexp + private + + def default_regexp # :nodoc: { SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/, USERINFO: /\A(?:%\h\h|[!$&-.0-;=3DA-Z_a-z~])*\z/, @@ -92,8 +109,6 @@ module URI } end =20 - private - def convert_to_uri(uri) if uri.is_a?(URI::Generic) uri --=20 EW