Module URI
In: lib/open-uri.rb
lib/uri/common.rb
lib/uri/ftp.rb
lib/uri/generic.rb
lib/uri/http.rb
lib/uri/https.rb
lib/uri/ldap.rb
lib/uri/mailto.rb
lib/uri.rb

URI support for Ruby

Author:Akira Yamada <akira@ruby-lang.org>
Documentation:Akira Yamada <akira@ruby-lang.org>, Dmitry V. Sabanin <sdmitry@lrn.ru>
License:Copyright (c) 2001 akira yamada <akira@ruby-lang.org> You can redistribute it and/or modify it under the same term as Ruby.
Revision:$Id: uri.rb,v 1.8.2.2 2004/07/17 13:07:46 akira Exp $

See URI for documentation

Methods

extract   join   parse   regexp   split  

Included Modules

REGEXP

Classes and Modules

Module URI::Escape
Module URI::REGEXP
Class URI::BadURIError
Class URI::Error
Class URI::FTP
Class URI::Generic
Class URI::HTTP
Class URI::HTTPS
Class URI::InvalidComponentError
Class URI::InvalidURIError
Class URI::LDAP
Class URI::MailTo

Public Class methods

Synopsis

  URI::extract(str[, schemes][,&blk])

Args

str:String to extract URIs from.
schemes:Limit URI matching to a specific schemes.

Description

Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.

Usage

  require "uri"

  URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
  # => ["http://foo.example.org/bla", "mailto:test@example.com"]

[Source]

     # File lib/uri/common.rb, line 547
547:   def self.extract(str, schemes = nil, &block)
548:     if block_given?
549:       str.scan(regexp(schemes)) { yield $& }
550:       nil
551:     else
552:       result = []
553:       str.scan(regexp(schemes)) { result.push $& }
554:       result
555:     end
556:   end

Synopsis

  URI::join(str[, str, ...])

Args

str:String(s) to work with

Description

Joins URIs.

Usage

  require 'uri'

  p URI.join("http://localhost/","main.rbx")
  # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>

[Source]

     # File lib/uri/common.rb, line 515
515:   def self.join(*str)
516:     u = self.parse(str[0])
517:     str[1 .. -1].each do |x|
518:       u = u.merge(x)
519:     end
520:     u
521:   end

Synopsis

  URI::parse(uri_str)

Args

uri_str:String with URI.

Description

Creates one of the URI‘s subclasses instance from the string.

Raises

URI::InvalidURIError

  Raised if URI given is not a correct one.

Usage

  require 'uri'

  uri = URI.parse("http://www.ruby-lang.org/")
  p uri
  # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
  p uri.scheme
  # => "http"
  p uri.host
  # => "www.ruby-lang.org"

[Source]

     # File lib/uri/common.rb, line 479
479:   def self.parse(uri)
480:     scheme, userinfo, host, port, 
481:       registry, path, opaque, query, fragment = self.split(uri)
482: 
483:     if scheme && @@schemes.include?(scheme.upcase)
484:       @@schemes[scheme.upcase].new(scheme, userinfo, host, port, 
485:                                    registry, path, opaque, query, 
486:                                    fragment)
487:     else
488:       Generic.new(scheme, userinfo, host, port, 
489:                   registry, path, opaque, query, 
490:                   fragment)
491:     end
492:   end

Synopsis

  URI::regexp([match_schemes])

Args

match_schemes:Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.

Description

Returns a Regexp object which matches to URI-like strings. The Regexp object returned by this method includes arbitrary number of capture group (parentheses). Never rely on it‘s number.

Usage

  require 'uri'

  # extract first URI from html_string
  html_string.slice(URI.regexp)

  # remove ftp URIs
  html_string.sub(URI.regexp(['ftp'])

  # You should not rely on the number of parentheses
  html_string.scan(URI.regexp) do |*matches|
    p $&
  end

[Source]

     # File lib/uri/common.rb, line 589
589:   def self.regexp(schemes = nil)
590:     unless schemes
591:       ABS_URI_REF
592:     else
593:       /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
594:     end
595:   end

Synopsis

  URI::split(uri)

Args

uri:String with URI.

Description

Splits the string on following parts and returns array with result:

  * Scheme
  * Userinfo
  * Host
  * Port
  * Registry
  * Path
  * Opaque
  * Query
  * Fragment

Usage

  require 'uri'

  p URI.split("http://www.ruby-lang.org/")
  # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]

[Source]

     # File lib/uri/common.rb, line 376
376:   def self.split(uri)
377:     case uri
378:     when ''
379:       # null uri
380: 
381:     when ABS_URI
382:       scheme, opaque, userinfo, host, port, 
383:         registry, path, query, fragment = $~[1..-1]
384: 
385:       # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
386: 
387:       # absoluteURI   = scheme ":" ( hier_part | opaque_part )
388:       # hier_part     = ( net_path | abs_path ) [ "?" query ]
389:       # opaque_part   = uric_no_slash *uric
390: 
391:       # abs_path      = "/"  path_segments
392:       # net_path      = "//" authority [ abs_path ]
393: 
394:       # authority     = server | reg_name
395:       # server        = [ [ userinfo "@" ] hostport ]
396: 
397:       if !scheme
398:         raise InvalidURIError, 
399:           "bad URI(absolute but no scheme): #{uri}"
400:       end
401:       if !opaque && (!path && (!host && !registry))
402:         raise InvalidURIError,
403:           "bad URI(absolute but no path): #{uri}" 
404:       end
405: 
406:     when REL_URI
407:       scheme = nil
408:       opaque = nil
409: 
410:       userinfo, host, port, registry, 
411:         rel_segment, abs_path, query, fragment = $~[1..-1]
412:       if rel_segment && abs_path
413:         path = rel_segment + abs_path
414:       elsif rel_segment
415:         path = rel_segment
416:       elsif abs_path
417:         path = abs_path
418:       end
419: 
420:       # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
421: 
422:       # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
423: 
424:       # net_path      = "//" authority [ abs_path ]
425:       # abs_path      = "/"  path_segments
426:       # rel_path      = rel_segment [ abs_path ]
427: 
428:       # authority     = server | reg_name
429:       # server        = [ [ userinfo "@" ] hostport ]
430: 
431:     else
432:       raise InvalidURIError, "bad URI(is not URI?): #{uri}"
433:     end
434: 
435:     path = '' if !path && !opaque # (see RFC2396 Section 5.2)
436:     ret = [
437:       scheme, 
438:       userinfo, host, port,         # X
439:       registry,                        # X
440:       path,                         # Y
441:       opaque,                        # Y
442:       query,
443:       fragment
444:     ]
445:     return ret
446:   end

[Validate]