61b2e92001-01-11Johan Schön //! This class implements URI parsing and resolving of relative references to //! absolute form, as defined in RFC 2396 // Implemented by Johan Sundström and Johan Schön. // Copyright (c) Roxen Internet Software 2001
d1916c2001-01-13Henrik Grubbström (Grubba) // $Id: URI.pike,v 1.4 2001/01/13 00:09:52 grubba Exp $
33593f2001-01-13Henrik Grubbström (Grubba)  #pragma strict_types
61b2e92001-01-11Johan Schön  //! Scheme component of URI string scheme; //! Authority component of URI (formerly called net_loc, from RFC 2396 known as authority) string authority; //! Path component of URI. May be empty, but not undefined. string path; //! Query component of URI. May be 0 if not present. string query; //! The fragment part of URI. May be 0 if not present. string fragment; //! Certain classes of URI (e.g. URL) may have these defined string host, user, password; //! If no port number is present in URI, but the scheme used has a default port number, //! this number is put here. int port; //! The base URI object, if present
33593f2001-01-13Henrik Grubbström (Grubba) object(this_program) base_uri;
61b2e92001-01-11Johan Schön  string raw_uri; #define error(S) throw( ({ (S), backtrace() }) ) #ifdef STANDARDS_URI_DEBUG #define debug(X, Y) werror("Standards.URI: "+X+"\n", Y) #define debug_n(X, Y) werror("Standards.URI: "+X+"\n", @Y) #else #define debug(X, Y) #define debug_n(X, Y) #endif // Parse authority component (according to RFC 1738, § 3.1) static void parse_authority() { if(sscanf(authority, "%[^@]@%s", string auth, authority) == 2) { sscanf(auth, "%[^:]:%s", user, password); // auth info present debug_n("parse_authority(): user=%O, password=%O", ({ user, password })); } if(scheme)
33593f2001-01-13Henrik Grubbström (Grubba)  port = [int]Protocols.Ports.tcp[scheme]; // Set a good default á la RFC 1700
61b2e92001-01-11Johan Schön  sscanf(authority, "%[^:]%*[:]%d", host, port); debug_n("parse_authority(): host=%O, port=%O", ({ host, port })); } // Inherit all properties except raw_uri and base_uri from the URI uri. :-)
33593f2001-01-13Henrik Grubbström (Grubba) static void inherit_properties(object(this_program) uri)
61b2e92001-01-11Johan Schön { authority = uri->authority; scheme = uri->scheme; user = uri->user; password = uri->password; host = uri->host; query = uri->query; port = uri->port; path = uri->path; fragment = uri->fragment; } //! Compare this URI to something, in a canonical way. //! @param something //! Compare the URI to this int `==(mixed something) { return _sprintf('t') == sprintf("%t", something) && _sprintf('x') == sprintf("%x", something); } string combine_uri_path(string base, string rel) { string buf; // RFC 2396, §5.2.6: // a) All but the last segment of the base URI's path component is // copied to the buffer. In other words, any characters after the // last (right-most) slash character, if any, are excluded. array segments=base/"/"; if(has_value(base, "/")) buf=segments[..sizeof(segments)-2]*"/"+"/"; else buf=base; // b) The reference's path component is appended to the buffer string. buf+=rel; segments = buf / "/"; // c) All occurrences of "./", where "." is a complete path segment, // are removed from the buffer string. for(int i=0; i<sizeof(segments)-1; i++) if(segments[i]==".") segments[i]=0; segments -= ({0}); // d) If the buffer string ends with "." as a complete path segment, // that "." is removed. if(segments[-1]==".") segments=segments[..sizeof(segments)-2]+({""}); // e) All occurrences of "<segment>/../", where <segment> is a // complete path segment not equal to "..", are removed from the // buffer string. Removal of these path segments is performed // iteratively, removing the leftmost matching pattern on each // iteration, until no matching pattern remains. int found_pattern; do { found_pattern=0; if(sizeof(segments)<3) continue; for(int i=0; i<sizeof(segments)-2; i++) { if(segments[i]!=".." && segments[i]!="" && segments[i+1]=="..") { segments = segments[..i-1]+segments[i+2..]; found_pattern=1; continue; } } } while(found_pattern); // f) If the buffer string ends with "<segment>/..", where <segment> // is a complete path segment not equal to "..", that // "<segment>/.." is removed. if(sizeof(segments)>=2) if(segments[-2]!=".." && segments[-1]=="..") segments=segments[..sizeof(segments)-3]+({""}); // g) If the resulting buffer string still begins with one or more // complete path segments of "..", then the reference is // considered to be in error. Implementations may handle this // error by retaining these components in the resolved path (i.e., // treating them as part of the final URI), by removing them from // the resolved path (i.e., discarding relative levels above the // root), or by avoiding traversal of the reference. return segments * "/"; } //! @decl void reparse_uri() //! @decl void reparse_uri(URI base_uri) //! @decl void reparse_uri(string base_uri) //! Reparse the URI with respect to a new base URI. If //! no base_uri was supplied, the old base_uri is thrown away. //! The resolving is performed according to the guidelines //! outlined by RFC 2396, Uniform Resource Identifiers (URI): Generic Syntax. //! @param base_uri //! Set the new base URI to this.
33593f2001-01-13Henrik Grubbström (Grubba) void reparse_uri(object(this_program)|string|void base_uri)
61b2e92001-01-11Johan Schön { string uri = raw_uri;
33593f2001-01-13Henrik Grubbström (Grubba)  if(stringp(base_uri))
61b2e92001-01-11Johan Schön  {
33593f2001-01-13Henrik Grubbström (Grubba)  debug("cloning base URI %O", base_uri); local::base_uri = object_program(this_object())(base_uri); // create a new URI object
61b2e92001-01-11Johan Schön  } else
33593f2001-01-13Henrik Grubbström (Grubba)  local::base_uri = [object(this_program)]base_uri;
61b2e92001-01-11Johan Schön  // RFC 2396, §5.2: // 1) The URI reference is parsed into the potential four components and // fragment identifier, as described in Section 4.3. // 2) If the path component is empty and the scheme, authority, and // query components are undefined, then it is a reference to the // current document and we are done. Otherwise, the reference URI's // query and fragment components are defined as found (or not found) // within the URI reference and not inherited from the base URI. // (Doing this at once saves us some useless parsing efforts.) if(!raw_uri || raw_uri == "") { debug("Path is empty -- Inherit entire base URI as per RFC 2396, §5.2 step 2. Done!", 0);
33593f2001-01-13Henrik Grubbström (Grubba)  inherit_properties(local::base_uri);
61b2e92001-01-11Johan Schön  return; } if(raw_uri[0] == '#') { debug("Fragment only. Using entire base URI, except fragment.", 0);
33593f2001-01-13Henrik Grubbström (Grubba)  inherit_properties(local::base_uri);
61b2e92001-01-11Johan Schön  fragment=raw_uri[1..]; return; } // Parse fragment identifier sscanf(uri, "%s#%s", uri, fragment); debug("Found fragment %O", fragment); // Parse scheme if(sscanf(uri, "%[A-Za-z0-9+.-]:%s", scheme, uri) < 2) { scheme = 0;
33593f2001-01-13Henrik Grubbström (Grubba)  if(!local::base_uri)
61b2e92001-01-11Johan Schön  error("Standards.URI: got a relative URI (no scheme) lacking a base_uri!\n"); } debug("Found scheme %O", scheme); // Parse authority/login if(sscanf(uri, "//%[^/]%s", authority, uri)) { debug("Found authority %O", authority); } // Parse query information sscanf(uri, "%s?%s", uri, query); if(query && !sizeof(query)) query = 0; debug("Found query %O", query); // Parse path: path = uri; debug("Found path %O", path); // 3) If the scheme component is defined, indicating that the reference // starts with a scheme name, then the reference is interpreted as an // absolute URI and we are done. Otherwise, the reference URI's // scheme is inherited from the base URI's scheme component. if(scheme) {
d1916c2001-01-13Henrik Grubbström (Grubba)  if(authority) parse_authority();
61b2e92001-01-11Johan Schön  debug("Scheme found! RFC 2396, §5.2, step 3 says we're absolute. Done!", 0); return; }
33593f2001-01-13Henrik Grubbström (Grubba)  scheme = local::base_uri->scheme;
61b2e92001-01-11Johan Schön  debug("Inherited scheme %O from base URI", scheme);
02aadc2001-01-11Johan Schön 
d1916c2001-01-13Henrik Grubbström (Grubba)  if(authority) parse_authority();
61b2e92001-01-11Johan Schön  // 4) If the authority component is defined, then the reference is a // network-path and we skip to step 7. Otherwise, the reference // URI's authority is inherited from the base URI's authority // component, which will also be undefined if the URI scheme does not // use an authority component. if(!authority || !sizeof(authority)) {
33593f2001-01-13Henrik Grubbström (Grubba)  authority = local::base_uri->authority;
61b2e92001-01-11Johan Schön  debug("Inherited authority %O from base URI", authority); parse_authority(); // 5) If the path component begins with a slash character ("/"), then // the reference is an absolute-path and we skip to step 7. if(!sscanf(path, "/%*s")) { // 6) If this step is reached, then we are resolving a relative-path // reference. The relative path needs to be merged with the base // URI's path. Although there are many ways to do this, we will // describe a simple method using a separate string buffer. debug_n("Combining base path %O with path %O => %O",
33593f2001-01-13Henrik Grubbström (Grubba)  ({ local::base_uri->path, path, combine_uri_path(local::base_uri->path, path) })); path = combine_uri_path(local::base_uri->path, path);
61b2e92001-01-11Johan Schön  } } // 7) The resulting URI components, including any inherited from the // base URI, are recombined to give the absolute form of the URI reference. // (Reassembly is done at cast-to-string/sprintf() time) } //! @decl void create(URI uri) //! @decl void create(URI uri, URI base_uri) //! @decl void create(URI uri, string base_uri) //! @decl void create(string uri) //! @decl void create(string uri, URI base_uri) //! @decl void create(string uri, string base_uri) //! @param base_uri //! When supplied, will root the URI a the given location. This is //! needed to correctly verify relative URIs, but may be left out otherwise. //! If left out, and uri is a relative URI, an error is thrown. //! @param uri //! When uri is another URI object, the created //! URI will inherit all properties of the supplied uri //! except, of course, for its base_uri.
33593f2001-01-13Henrik Grubbström (Grubba) void create(object(this_program)|string uri, object(this_program)|string|void base_uri)
61b2e92001-01-11Johan Schön {
33593f2001-01-13Henrik Grubbström (Grubba)  debug_n("Create(%O, %O) called!", ({ uri, base_uri }));
61b2e92001-01-11Johan Schön  if(stringp(uri))
33593f2001-01-13Henrik Grubbström (Grubba)  raw_uri = [string]uri; // Keep for future runs of reparse_uri after a base_uri change
61b2e92001-01-11Johan Schön  else // if(objectp(uri)) (implied) { raw_uri = uri->raw_uri;
33593f2001-01-13Henrik Grubbström (Grubba)  inherit_properties([object(this_program)]uri);
61b2e92001-01-11Johan Schön  }
33593f2001-01-13Henrik Grubbström (Grubba)  reparse_uri(base_uri);
61b2e92001-01-11Johan Schön } //! Assign a new value to a property of URI //! @param property //! When any of the following properties are used, properties that //! depend on them are recalculated: user, password, host, port, authority, base_uri. //! @param value //! The value to assign to @[property] mixed `[]=(string property, mixed value) { debug_n("`[]=(%O, %O)", ({ property, value })); switch(property) { case "user": case "password": case "host": case "port": ::`[]=(property, value); authority = (user ? user + (password ? ":" + password : "") + "@" : "") + (host || "") + (port != Protocols.Ports.tcp[scheme] ? ":" + port : ""); return value; case "authority":
33593f2001-01-13Henrik Grubbström (Grubba)  authority = [string]value;
61b2e92001-01-11Johan Schön  parse_authority(); // Set user, password, host and port accordingly return value; case "base_uri":
33593f2001-01-13Henrik Grubbström (Grubba)  reparse_uri([object(this_program)|string]value);
61b2e92001-01-11Johan Schön  return base_uri; default: return ::`[]=(property, value); // Set and return the new value } } //! Works similar to @[`[]=(string property, mixed value)] mixed `->=(string property, mixed value) { return `[]=(property, value); } //! When cast to string, return the URI (in a canonicalized form). //! When cast to mapping, return a mapping with scheme, authority, user, password, host, //! port, path, query, fragment, raw_uri, base_uri as documented above. string|mapping cast(string to) { switch(to) { case "string": return _sprintf('s'); case "mapping": array(string) i = ({ "scheme", "authority", "user", "password", "host", "port", "path", "query", "fragment", "raw_uri", "base_uri", }); return mkmapping(i, rows(this_object(), i)); } } string _sprintf(int how, mapping|void args) { string look, _scheme = scheme, _host = host, getstring; switch(how) { case 't': return "Standards.URI"; case 'x': // A case-mangling version, especially suited for readable hash values if(_host) _host = lower_case(_host); if(_scheme) _scheme = lower_case(_scheme); case 's': case 'O': getstring = (path||"") + (query ? "?" + query : ""); look = _scheme + ":" + (authority ? "//" + (user ? user + (password ? ":" + password : "") + "@" : "") + (_host || "") + (port != Protocols.Ports.tcp[scheme] ? ":" + port : "") : ("")) + getstring + (fragment ? "#" + fragment : ""); break; } if(how == 'O') return "URI(\"" + look + "\")"; else if(args && args->flag_left) return getstring; else return look; } // Not used yet. string quote(string s) { return replace(s, ({ "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017", "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\027", "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\037", "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207", "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217", "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227", "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237", " ", "%", "'", "\"" }), ({ "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", "%20", "%25", "%27", "%22"})); }