(*   
     URI management.
     Based on simplified RFC 2396.
     
     Copyright (C) 2012 Antoine Miné
 *)


type uri =
    { uri_relative:  bool;
      uri_scheme:    string;
      uri_server:    string;
      uri_port:      int option;
      uri_path:      string list;
      uri_query:     (string*(string option)) list;
      uri_fragment:  string option;
    }
(* absolute URI  scheme://server:port/path?query#fragment 
   relative URI  path?query#fragment
 *)


(* http://server:port/ *)
let base_uri server port =
  { uri_relative = false;
    uri_scheme   = "http";
    uri_server   = server;
    uri_port     = port;
    uri_path     = [];
    uri_query    = [];
    uri_fragment = None;
  }


(* utilities *)

let hex_digit = 
  [|'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'A';'B';'C';'D';'E';'F'|]

let parse_hex c =
  if c>='0' && c<='9' then (Char.code c) - (Char.code '0')
  else if c>='a' && c<='f' then (Char.code c) - (Char.code 'a') + 10
  else if c>='A' && c<='F' then (Char.code c) - (Char.code 'A') + 10
  else raise(Invalid_argument("invalid hex digit: "^(String.make 1 c)))


(* escape special characters in an URI component *)
let uri_escape (s:string) : string = 
  let buf = Buffer.create 256 in
  for i=0 to (String.length s)-1 do let c = s.[i] in match c with
  | '\000'..'\031' | '\127'..'\255'                  (* control, non ASCII *)
  | ';' | '/'| '?' | ':' | '@'| '&'                     (* reserved *)
  | '=' | '+' | '$' | ','| '<' | '>' | '#' | '%' | '\"' (* delimiters *)
  | '{' | '}' | '|' | '\\' | '^' | '[' | ']' | '`'      (* unwise *)
  | ' '                                                 (* space *)
    ->
      Printf.bprintf buf "%%%c%c"
        hex_digit.(Char.code c / 16) hex_digit.(Char.code c mod 16)
  | _ -> Buffer.add_char buf c
  done;
  Buffer.contents buf


(* remove . and .. in path / URI *)
let path_canonicalize l =
  let rec canon skip accum = function
    | [] -> gen skip accum
    | ""::r -> canon skip accum r
    | "."::r -> canon skip accum r
    | ".."::r -> canon (skip+1) accum r
    | a::r when skip>0 -> canon (skip-1) accum r
    | a::r -> canon skip (a::accum) r
  and gen skip accum = if skip=0 then accum else gen (skip-1) (".."::accum)
  in
  canon 0 [] (List.rev l)
  

(* concat URIs *)
let uri_concat (base:uri) (rel:uri) =
  { uri_scheme = if rel.uri_scheme="" then base.uri_scheme else rel.uri_scheme;
    uri_server = if rel.uri_server="" then base.uri_server else rel.uri_server;
    uri_port   = if rel.uri_port = None then base.uri_port else rel.uri_port;
    uri_path   = 
    path_canonicalize
      (if rel.uri_relative then base.uri_path @ rel.uri_path else rel.uri_path);
    uri_relative = base.uri_relative && rel.uri_relative;
    uri_query    = rel.uri_query;
    uri_fragment = rel.uri_fragment;
  }


(* URI => string representation *)
let to_string (u:uri) : string =
  let buf = Buffer.create 256 in
  let escape s = Buffer.add_string buf (uri_escape s) in
  let query = function
      (a,None)   -> escape a
    | (a,Some b) -> escape a; Buffer.add_char buf '='; escape b
  in
  if u.uri_relative then () else (
  if u.uri_scheme <> "" then Printf.bprintf buf "%s:" u.uri_scheme;
  if u.uri_server <> "" then Printf.bprintf buf "//%s" u.uri_server;
  (match u.uri_port with None -> () | Some x -> Printf.bprintf buf ":%i" x);
  Buffer.add_char buf '/');
  if u.uri_path <> [] then
    (escape (List.hd u.uri_path);
     List.iter (fun l -> Buffer.add_char buf '/'; escape l) (List.tl u.uri_path)
    );
  if u.uri_query <> [] then
    (Buffer.add_char buf '?';
     query (List.hd u.uri_query);
     List.iter (fun x -> Buffer.add_char buf '&'; query x) (List.tl u.uri_query)
    );
  (match u.uri_fragment with 
  | None -> () | Some x -> Buffer.add_char buf '#'; escape x);
  Buffer.contents buf


(* string => URI *)
let of_string (s:string) : uri =
  let l = String.length s
  and buf = Buffer.create 256 in
  let rec unescape pos =
    if pos>=l then pos else match s.[pos] with
    | ':' | '/' | '?' | '&' | '=' | '#' -> pos
    | '%' when pos+2<l ->
        let c = Char.chr ((parse_hex s.[pos+1]) * 16 + (parse_hex s.[pos+2])) in
        Buffer.add_char buf c;
        unescape (pos+3)
    | x -> Buffer.add_char buf x; unescape (pos+1)
  in
  let unescape p = Buffer.clear buf; unescape p in
  let e = unescape 0 in
  let scheme,o =
    if e<l && s.[e]=':' then 
      let x = Buffer.contents buf in x, e+1
    else "", 0
  in
  let server,port,o =
    if o+1<l && s.[o]='/' && s.[o+1]='/' then
      let ee = unescape (o+2) in
      let x = Buffer.contents buf in
      if ee<l && s.[ee]=':' then
        let eee = unescape (ee+1) in
        let y = int_of_string (Buffer.contents buf) in
        x, Some y, eee
      else x, None, ee
    else "", None, o
  in
  let rel,o = if o<l && s.[o]='/' then false, o+1 else true, o in
  let path,query,fragment = ref [], ref [], ref None in
  let rec toto o =
    if o>=l then () 
    else if s.[o]='#' then 
      let o = unescape (o+1) in 
      fragment := Some (Buffer.contents buf); toto o
    else if s.[o]='?' || s.[o]='&' then
      let o = unescape (o+1) in
      let key = Buffer.contents buf in
      if key = "" then toto o else
      if o<l && s.[o]='=' then
        let o = unescape (o+1) in
        query := (key, Some (Buffer.contents buf))::(!query);
        toto o
      else (query := (key, None)::(!query); toto o)
    else
      let oo = unescape o in
      let oo = if o=oo then unescape (o+1) else oo in
      let b = Buffer.contents buf in
      if b<>"" then path := b::(!path);
      toto oo
  in
  toto o;
  { uri_scheme   = scheme;
    uri_server   = server;
    uri_port     = port;
    uri_path     = path_canonicalize (List.rev !path);
    uri_relative = rel;
    uri_query    = List.rev !query;
    uri_fragment = !fragment;
  }


