From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: spew@80x24.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0D7F51F8A1; Mon, 22 Dec 2014 21:03:35 +0000 (UTC) Date: Mon, 22 Dec 2014 21:03:34 +0000 From: Eric Wong To: spew@80x24.org Subject: [PATCH] uri optimizations, too ugly Message-ID: <20141222210334.GA25843@dcvr.yhbt.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable List-Id: --- lib/uri/generic.rb | 32 +++++++--------- lib/uri/rfc3986_parser.rb | 93 +++++++++++++++++++++++++++----------------= ---- 2 files changed, 67 insertions(+), 58 deletions(-) diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb index c0b94a8..c72103f 100644 --- a/lib/uri/generic.rb +++ b/lib/uri/generic.rb @@ -695,13 +695,7 @@ module URI # see also URI::Generic.port=3D # def set_port(v) - unless !v || v.kind_of?(Fixnum) - if v.empty? - v =3D nil - else - v =3D v.to_i - end - end + v =3D v.empty ? nil : v.to_i unless !v || v.kind_of?(Fixnum) @port =3D v end protected :set_port @@ -768,13 +762,13 @@ module URI =20 # If scheme is ftp, path may be relative. # See RFC 1738 section 3.2.2, and RFC 2396. - if @scheme && @scheme !=3D "ftp" - if v && v !=3D '' && parser.regexp[:ABS_PATH] !~ v + if @scheme && @scheme !=3D "ftp".freeze + if v && v !=3D ''.freeze && parser.regexp[:ABS_PATH] !~ v raise InvalidComponentError, "bad component(expected absolute path component): #{v}" end else - if v && v !=3D '' && parser.regexp[:ABS_PATH] !~ v && parser.regex= p[:REL_PATH] !~ v + if v && v !=3D ''.freeze && parser.regexp[:ABS_PATH] !~ v && parse= r.regexp[:REL_PATH] !~ v raise InvalidComponentError, "bad component(expected relative path component): #{v}" end @@ -849,7 +843,7 @@ module URI x =3D v.to_str v =3D x.dup if x.equal? v v.encode!(Encoding::UTF_8) rescue nil - v.delete!("\t\r\n") + v.delete!("\t\r\n".freeze) v.force_encoding(Encoding::ASCII_8BIT) v.gsub!(/(?!%\h\h|[!$-&(-;=3D?-Z_a-~])./n.freeze){'%%%02X'.freeze % = $&.ord} v.force_encoding(Encoding::US_ASCII) @@ -939,9 +933,9 @@ module URI x =3D v.to_str v =3D x.dup if x.equal? v v.encode!(Encoding::UTF_8) rescue nil - v.delete!("\t\r\n") + v.delete!("\t\r\n".freeze) v.force_encoding(Encoding::ASCII_8BIT) - v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X' % $&.ord} + v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X'.freeze % $&.ord} v.force_encoding(Encoding::US_ASCII) @fragment =3D v end @@ -996,8 +990,8 @@ module URI rel_path =3D split_path(rel) =20 # RFC2396, Section 5.2, 6), a) - base_path << '' if base_path.last =3D=3D '..' - while i =3D base_path.index('..') + base_path << ''.freeze if base_path.last =3D=3D '..'.freeze + while i =3D base_path.index('..'.freeze) base_path.slice!(i - 1, 2) end =20 @@ -1008,7 +1002,7 @@ module URI =20 # RFC2396, Section 5.2, 6), c) # RFC2396, Section 5.2, 6), d) - rel_path.push('') if rel_path.last =3D=3D '.' || rel_path.last =3D= =3D '..' + rel_path << ''.freeze if rel_path.last =3D=3D '.'.freeze || rel_path= =2Elast =3D=3D '..'.freeze rel_path.delete('.') =20 # RFC2396, Section 5.2, 6), e) @@ -1029,7 +1023,7 @@ module URI base_path.pop end while x =3D tmp.shift - if x =3D=3D '..' + if x =3D=3D '..'.freeze # RFC2396, Section 4 # a .. or . in an absolute path has no special meaning base_path.pop if base_path.size > 1 @@ -1045,9 +1039,9 @@ module URI break end end - base_path.push('') if add_trailer_slash + base_path << ''.freeze if add_trailer_slash =20 - return base_path.join('/') + return base_path.join('/'.freeze) end private :merge_path =20 diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb index 946f374..c143939 100644 --- a/lib/uri/rfc3986_parser.rb +++ b/lib/uri/rfc3986_parser.rb @@ -4,6 +4,11 @@ module URI # this regexp is modified not to host is not empty string RFC3986_URI =3D /\A(?(?[A-Za-z][+\-.0-9A-Za-z]*):(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=3DA-Z_a-z~])*)@)?(?= (?\[(?:(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1= ,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?:= :(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g= |(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\= h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{= 1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v= \h+\.[!$&-.0-;=3DA-Z_a-z~]+))\])|\g|(?(?:%\h\h|[!$&-= =2E0-9;=3DA-Z_a-z~])+))?(?::(?\d*))?)(?(?:\/(?= (?:%\h\h|[!$&-.0-;=3D@-Z_a-z~])*))*)|(?\/(?:(?(?= :%\h\h|[!$&-.0-;=3D@-Z_a-z~])+)(?:\/\g)*)?)|(?\g(?:\/\g)*)|(?))(?:\?(?[^#]*))?(?:\#(?<= fragment>(?:%\h\h|[!$&-.0-;=3D@-Z_a-z~\/?])*))?)\z/ RFC3986_relative_ref =3D /\A(?(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=3DA-Z_a-z~])*)@)?(?(?\[(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?::(?:\h{1,4}:){4}\= g|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}= :){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1= ,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1= ,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h+\.[!$&-.0-;= =3DA-Z_a-z~]+)\])|\g|(?(?:%\h\h|[!$&-.0-9;=3DA-Z_a-z= ~])+))?(?::(?\d*))?)(?(?:\/(?(?:%\h\h|[!$&-.0-= ;=3D@-Z_a-z~])*))*)|(?\/(?:(?(?:%\h\h|[!$&-.0-;= =3D@-Z_a-z~])+)(?:\/\g)*)?)|(?(?(?:%= \h\h|[!$&-.0-9;=3D@-Z_a-z~])+)(?:\/\g)*)|(?))(?:\?(?[^#]*))?(?:\#(?(?:%\h\h|[!$&-.0-;=3D@-Z_a-z~\/?])*))?)\z/ + attr_reader :regexp + + def initialize + @regexp =3D default_regexp.each_value(&:freeze).freeze + end =20 def split(uri) #:nodoc: begin @@ -11,42 +16,52 @@ module URI rescue NoMethodError raise InvalidURIError, "bad URI(is not URI?): #{uri}" end - unless uri.ascii_only? + uri.ascii_only? or raise InvalidURIError, "URI must be ascii only #{uri.dump}" - end if m =3D RFC3986_URI.match(uri) - ary =3D [] - ary << m["scheme"] - if m["path-rootless"] # opaque - ary << nil # userinfo - ary << nil # host - ary << nil # port - ary << nil # registry - ary << nil # path - ary << m["path-rootless"] - ary[-1] << '?' << m["query"] if m["query"] - ary << nil # query - ary << m["fragment"] + scheme =3D m["scheme".freeze] + opaque =3D m["path-rootless".freeze] + if opaque + query =3D m["query".freeze] + opaque << "?#{query}" if query + [ scheme, + nil, # userinfo + nil, # host + nil, # port + nil, # registry + nil, # path + opaque, + nil, + m["fragment".freeze] + ] else # normal - ary << m["userinfo"] - ary << m["host"] - ary << m["port"] - ary << nil # registry - ary << (m["path-abempty"] || m["path-absolute"] || m["path-empty= "]) - ary << nil # opaque - ary << m["query"] - ary << m["fragment"] + [ scheme, + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-empty".freeze]), + nil, # opaque + m["query".freeze], + m["fragment".freeze] + ] end elsif m =3D RFC3986_relative_ref.match(uri) - ary =3D [nil] - ary << m["userinfo"] - ary << m["host"] - ary << m["port"] - ary << nil # registry - ary << (m["path-abempty"] || m["path-absolute"] || m["path-noschem= e"] || m["path-empty"]) - ary << nil # opaque - ary << m["query"] - ary << m["fragment"] + [ nil, # scheme + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry, + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-noscheme".freeze] || + m["path-empty".freeze]), + nil, # opaque + m["query".freeze], + m["fragment".freeze] + ] else raise InvalidURIError, "bad URI(is not URI?): #{uri}" end @@ -55,11 +70,11 @@ module URI def parse(uri) # :nodoc: scheme, userinfo, host, port, registry, path, opaque, query, fragment =3D self.split(uri) - - if scheme && URI.scheme_list.include?(scheme.upcase) - URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, - registry, path, opaque, query, - fragment, self) + scheme_list =3D URI.scheme_list + if scheme && scheme_list.include?(uc =3D scheme.upcase) + scheme_list[uc].new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) else Generic.new(scheme, userinfo, host, port, registry, path, opaque, query, @@ -78,7 +93,9 @@ module URI @@to_s.bind(self).call end =20 - def regexp + private + + def default_regexp # :nodoc: { SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/, USERINFO: /\A(?:%\h\h|[!$&-.0-;=3DA-Z_a-z~])*\z/, @@ -92,8 +109,6 @@ module URI } end =20 - private - def convert_to_uri(uri) if uri.is_a?(URI::Generic) uri --=20 EW