(*   
     HTTP protocol.
     
     Copyright (C) 2012 Antoine Miné
 *)


(* configuration *)
(* ------------- *)


let max_download_length = 64*1024*1024
let max_post_length = 64*1024*1024

(* max simultaneous requests *)
let max_connections = 10

(* in seconds, per request *)
let timeout = 8

(* whether to log requests fully, including cookies and post *)
let log_full_request = true


let server_name = "Analyzer"

let log_filename = "/tmp/analysis.log"


(* utilities *)
(* --------- *)


exception Timeout

let log = open_out_gen 
    [Open_wronly; Open_append; Open_creat]
    0o644 log_filename

let space = Str.regexp "[ ]+"

let semi = Str.regexp ";"

let eq = Str.regexp "="

let despace s =
  let l = String.length s in
  let rec left x = if x<l && s.[x]=' '   then left (x+1) else x in
  let rec right x = if x>0 && s.[x-1]=' ' then right (x-1) else x in
  let e,f = left 0, right l in
  if f=0 then "" else String.sub s e (f-e)


(* get a line, but without the trailing \r *)
let input_line_http i =
  let s = input_line i in
  let l = String.length s in
  if l>0 && s.[l-1]='\r' then String.sub s 0 (l-1) else s

(* skip empty lines *)
let rec input_line_http_nonempty i =
  match input_line_http i with
  | "" -> input_line_http_nonempty i
  | l -> l


(* cut header into bits H: V1; ...; Vn *)
let parse_header (s:string) : string (* H *) * string (* V1; ...; Vn *) * (string list (* [V1;...;Vn] *) ) =
  try
    let i = String.index s ':' in
    let h,r = String.sub s 0 i, String.sub s (i+1) (String.length s-i-1) in
    let h,r = String.lowercase (despace h), despace r in
    h, r, List.map despace (Str.split semi r)
  with _ -> s,"",[]


(* parses bindings a=b *)
let parse_bind (l:string list) : (string * string) list =
  let x = 
    List.fold_left
      (fun acc y ->
        match Str.split eq y with
          [a;b] -> (despace a, despace b)::acc | _ -> acc
      )
      [] l
  in
  List.rev x


let inet_addr = function
  | Unix.ADDR_INET (addr,port) -> addr, port
  | _ -> invalid_arg "not an Internet socket"


(* printed at each log line *)
let log_banner f =
  Printf.fprintf f "%t (%i)" Date.pp_cur_local (Unix.getpid ())


(* restart interrupted system calls *)
let rec no_eintr f a =
  try f a with Unix.Unix_error(Unix.EINTR,_,_) -> no_eintr f a




(* HTTP connection *)
(* --------------- *)

type http = {
    http_ver: int;                      (* protocol minor version: 0 or 1 *)
    http_uri: Uri.uri;
    http_post: (string * string) list;                     (* posted data *)
    http_cookies: (string * string) list;           (* cookies in request *)
    http_referer: Uri.uri option;
    http_out: out_channel;                              (* answer channel *)
    http_cgi: bool;                            (* CGI-bin vs. HTTP server *)
    http_addr: Unix.inet_addr;                          (* client address *)
    http_user_agent: string;
  }


(* standard HTTP errors *)
exception HTTP_error of int * string



(* MIME types *)
(* ---------- *)

let mime_html = "text/html; charset=UTF-8"
let mime_text = "text/plain; charset=UTF-8"
let mime_gif  = "image/gif"
let mime_png  = "image/png"
let mime_jpeg = "image/jpeg"
let mime_svg  = "image/svg+xml; charset=UTF-8"
let mime_css  = "text/css; charset=UTF-8"
let mime_js   = "text/javascript; charset=UTF-8" (* obsolete, but well supported *)
let mime_form = "multipart/form-data"


(* guess the mime type from the filename extension *)
let mime_of_filename filename =
  try 
    let p = String.rindex filename '.' in
    let ext = String.sub filename (p+1) (String.length filename - p - 1) in
    match String.lowercase ext with
    | "html" | "htm" -> mime_html
    | "gif" -> mime_gif
    | "png" -> mime_png
    | "jpg" | "jpeg" -> mime_jpeg
    | "svg" -> mime_svg
    | "css" -> mime_css
    | "js" -> mime_js
    | "txt" -> mime_text
    | _ -> raise Not_found
  with _ -> mime_text



(* printing *)
(* -------- *)

let hprintf (h:http) (x:('a,out_channel,unit) format) = 
  Printf.fprintf h.http_out x

let hstring (h:http) (s:string) = 
  output_string h.http_out s

let hchar (h:http) (c:char) =
  output_char h.http_out c

let hflush (h:http) =
  flush h.http_out




(* POST handling *)
(* ------------- *)

(* TODO:
   - be more robust 
   - nested multipart
   - transfert-encoding
 *)
let decode_post (i:in_channel) (len:int) (bound:string) 
    : (string * string) list =
  if len > max_post_length then invalid_arg "POST request too large";
  let posted = ref [] in
  set_binary_mode_in i true;
  let r = String.create len in
  really_input i r 0 len;
  (if bound = "" then
    (* regular post, parsed as URI *)
    let u = (Uri.of_string ("?"^r)).Uri.uri_query in
    posted := List.map (fun (a,b) -> a,match b with Some x -> x | _ -> "") u
  else
    (* multipart post *)
    let b = Str.regexp_string bound in
    let rec eat pos = 
      if r.[pos] = '-' && r.[pos+1] = '-' then () else
      (* eat one part *)
      let var = ref "" in
      let rec eath pos =
        (* eat one header line *)
        let e = String.index_from r pos '\r' in
        let h = String.sub r pos (e-pos) in
        match parse_header h with
        | "","",_ -> e+2
        | "content-disposition", _, ["form-data";n] ->
            var := String.sub n 6 (String.length n - 7);
            eath (e+2)
        | _ -> eath (e+2)
      in
      let pos = eath (pos+2) in
      (* get contents *)
      let next = Str.search_forward b r pos in
      let data = String.sub r pos (next-pos-4) in
      if !var <> "" then posted := (!var,data)::(!posted);
      eat (next + String.length bound)
    in
    eat ((Str.search_forward b r 0) + String.length bound));
  List.rev !posted
 




(* HTTP / CGI answers *)
(* ------------------ *)


(* generic HTTP header *)
let http_header ?(status=(200,"OK")) ?(cookies=[]) ?(mime=mime_html) ?date ?length (h:http) =
  if h.http_cgi
  then hprintf h "Status: %i %s\r\n" (fst status) (snd status)
  else hprintf h "HTTP/1.%i %i %s\r\n" h.http_ver (fst status) (snd status);
  hprintf h "Content-Type: %s\r\n" mime;
  if h.http_cgi then (
    hprintf h "Server: %s\r\n" server_name;
    if h.http_ver > 0 then hstring h "Connection: close\r\n"
   );
  (match length with None->() | Some x -> hprintf h "Content-Length: %i\r\n" x);
  hprintf h "Date: %s\r\n" (Date.to_string (Date.current ()));
  (match date with None -> () | Some d -> hprintf h "Last-modified: %s\r\n" (Date.to_string d));
  List.iter (fun (v,d) -> hprintf h "Set-Cookie: %s=\"%s\"\r\n" v d) cookies;
  hprintf h "X-Powered-By: OCaml/%s\r\n" Sys.ocaml_version;
  hstring h "\r\n";
  hflush h


(* error header and page *)
let http_error status (h:http) =
  http_header ~status ~mime:mime_html h;
  hprintf h 
    "<html><head><title>%i %s</title></head><body><h1>%i %s</h1></body></html>\r\n\r\n" 
    (fst status) (snd status) (fst status) (snd status);
  hflush h


(* redirection *)
let http_redirect ?(status=(301,"Moved Permanently")) (loc:string) (h:http) =
  Printf.fprintf log "%t redirected to %s\n" log_banner loc;
  if h.http_cgi
  then hprintf h "Status: %i %s\r\n" (fst status) (snd status)
  else hprintf h "HTTP/1.%i %i %s\r\n" h.http_ver (fst status) (snd status);
  hprintf h "Location: %s\r\n" loc;
  hstring h "\r\n";
  hflush h
    

(* only allow reaaaaly safe characters *)
let check_filename name =
  String.iter
    (function
      | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' 
      | '+' | '-' | '=' | '_' | '.' | ',' | ';' -> ()
      | _ -> raise (HTTP_error (404,"Not Found")) 
    )
    name
  
  
(* sends the contents of a file *)
let http_send_file ?mime (filename:string) (h:http) =
  let contents =
    try
      let f = open_in_bin filename in
      let st = Unix.fstat (Unix.descr_of_in_channel f) in
      if st.Unix.st_perm land 4 = 0 || st.Unix.st_kind <> Unix.S_REG then
        raise Not_found;
      let length = in_channel_length f in
      if length > max_download_length then raise Not_found;
      let buf = String.create length in
      really_input f buf 0 length;
      close_in f;
      Some (buf,length,st.Unix.st_mtime)
    with _ -> None
  in
  match contents with 
  | Some (buf,length,date) ->
      let mime = match mime with None -> mime_of_filename filename | Some x -> x
      in
      http_header ~mime ~length ~date h;
      output_string h.http_out buf;
      hflush h;
      Printf.fprintf log "%t file %s sent with type %s\n" 
        log_banner filename mime; 
      flush log
  | None -> raise (HTTP_error (404,"Not Found"))
        

  
(* HTTP requests *)
(* ------------- *)


(* parse a request, including POST body if needed *)

let http_parse_request (sock:Unix.file_descr) : http =
  let i,o = Unix.in_channel_of_descr sock, Unix.out_channel_of_descr sock in
  try
    (* get client address *)
    let addr = fst (inet_addr (Unix.getsockname sock)) in
    (* get request *)
    let post, uri, ver = match Str.split space (input_line_http_nonempty i) with
    | ["GET";uri;"HTTP/1.0"] -> false, uri, 0
    | ["GET";uri;"HTTP/1.1"] -> false, uri, 1
    | ["POST";uri;"HTTP/1.0"] -> true, uri, 0
    | ["POST";uri;"HTTP/1.1"] -> true, uri, 1
    | _ -> raise (HTTP_error (501,"Not Implemented"))
    in
    let referer = ref None 
    and cookies = ref []
    and bound = ref ""
    and len = ref 0
    and agent = ref ""
    in
    (* parse headers *)
    let rec eat () = 
      let h = input_line_http i in
      match parse_header h with
      | "","",[] -> ()
      | "referer", r, _ ->  referer := Some (Uri.of_string r); eat ()
      | "expect", _, ["100-continue"] ->
          output_string o "HTTP/1.1 100 Continue\r\n\r\n";
          flush o;
          eat ()
      | "cookie", _, k -> cookies := parse_bind k; eat ()
      | "content-length", l, _ -> len := int_of_string l; eat ()
      | "content-type", _, ("multipart/form-data")::k ->
          bound := List.assoc "boundary" (parse_bind k);
          eat ()
      | "user-agent", l, _ -> agent := l; eat ()
      | _ -> eat ()
    in
    eat ();
    (* post *)
    let posted = if post && !len <> 0 then decode_post i !len !bound else [] 
    in
    { http_ver = ver;
      http_uri = Uri.of_string uri;
      http_post = posted;
      http_cookies = List.rev !cookies;
      http_referer = !referer;
      http_out = o;
      http_cgi = false;
      http_addr = addr;
      http_user_agent = !agent;
    }
  with _ -> raise (HTTP_error (400,"Bad Request"))



(* useful to reply to errors *)
let http_empty_request (sock:Unix.file_descr) : http =
  { http_ver = 1;
    http_uri = Uri.base_uri "" None;
    http_post = [];
    http_cookies = [];
    http_referer = None;
    http_out = Unix.out_channel_of_descr sock ;
    http_cgi = false;
    http_addr = fst (inet_addr (Unix.getsockname sock));
    http_user_agent = "";
  }



(* for debugging *)
let log_http_req o h =
  let log_list msg l =
    if l = [] then () else (
    Printf.fprintf o "  %s:\n" msg;
    List.iter 
      (fun (a,b) ->
        Printf.fprintf o "    %s=" a;
        if String.contains b '\n' then output_string o "\n";
        Printf.fprintf o "%s\n" b
      )
      l
   )
  in
  Printf.fprintf o "%t request:\n" log_banner;    
  Printf.fprintf o "  ver = 1.%i\n" h.http_ver;
  Printf.fprintf o "  client = %s\n" (Unix.string_of_inet_addr h.http_addr);
  Printf.fprintf o "  user-agent = %s\n" h.http_user_agent;
  Printf.fprintf o "  uri = %s\n" (Uri.to_string h.http_uri);
  Printf.fprintf o "  referere = %s\n" (match h.http_referer with None -> "none" | Some u -> Uri.to_string u);
  log_list "cookie" h.http_cookies;
  log_list "post" h.http_post


(* CGI requests *)
(* ------------ *)

let getenv_checked s =
  try Sys.getenv s 
  with Not_found -> 
    Printf.fprintf log "%t undefined environment variable %s\n" log_banner s;
    raise Not_found


(* get request from environment variables, and get a POST body if needed *)
let cgi_parse_request () : http =
  let i,o = stdin, stdout in
  let ver = match getenv_checked "SERVER_PROTOCOL" with
  | "HTTP/1.0" -> 0 | "HTTP/1.1" -> 1
  | _ ->  raise (HTTP_error (501,"Not Implemented"))
  in
  let post = match getenv_checked "REQUEST_METHOD" with
  | "GET" -> false | "POST" -> true
  | _ -> raise (HTTP_error (501,"Not Implemented"))
  in
  let len = try int_of_string (Sys.getenv "CONTENT_LENGTH") with _ -> 0 in
  let mime = try Sys.getenv "CONTENT_TYPE" with _ -> "" in
  let uri = 
    Printf.sprintf "%s%s?%s"
      (getenv_checked "SCRIPT_NAME")
      (try Sys.getenv "PATH_INFO" with Not_found -> "")
      (try Sys.getenv "QUERY_STRING" with Not_found -> "")
  in
  let cookies = 
    try parse_bind (Str.split semi (Sys.getenv "HTTP_COOKIE")) 
    with Not_found -> []
  in
  let posted = match Str.split semi mime with
  | ("multipart/form-data")::k  when post && len > 0 ->
      let bound = List.assoc "boundary" (parse_bind k) in
      decode_post i len bound
  | _ -> 
      if post && len > 0 then decode_post i len "" else []
  in
  { http_ver = ver;
    http_uri = Uri.of_string uri;
    http_post = posted;
    http_cookies = List.rev cookies;
    http_referer = None;
    http_out = o;
    http_cgi = true;
    http_addr = Unix.inet_addr_of_string (getenv_checked "REMOTE_ADDR");
    http_user_agent = (try Sys.getenv "HTTP_USER_AGENT" with _ -> "");
  }


let cgi_empty_request () : http =
  { http_ver = 1;
    http_uri = Uri.base_uri "" None;
    http_post = [];
    http_cookies = [];
    http_referer = None;
    http_out = stdout;
    http_cgi = true;
    http_addr = Unix.inet_addr_loopback;
    http_user_agent = "";
  }


  
(* Server *)
(* ------ *)


(* generic server: listen to port and call handler in a forked process
   for each connection
 *)
let server (port:int) (handler:Unix.file_descr -> unit) =
  Sys.set_signal 
    Sys.sigchld (Sys.Signal_handle (fun _ -> ignore (Unix.wait ())));
  let sock = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in
  Unix.setsockopt sock Unix.SO_REUSEADDR true;
  Unix.bind sock (Unix.ADDR_INET(Unix.inet_addr_any, port));
  Unix.listen sock 15;
  Sys.catch_break true;
  Printf.fprintf log "%t starting server, listening to port %i\n" log_banner port;
  flush log;
  try while true do
    let client, addr = no_eintr Unix.accept sock in
    match addr with
    | Unix.ADDR_INET (addr,port) ->
        Printf.fprintf log "%t server accepting connection from %s:%i\n"
          log_banner (Unix.string_of_inet_addr addr) port;
        flush log;
        (match Unix.fork () with
        | 0 -> 
            (try 
              handler client; 
              Unix.shutdown client Unix.SHUTDOWN_ALL;
            with _ -> ());
            exit 0
        | _ -> Unix.close client)
    | _ -> Unix.close client
  done with x -> 
    Unix.shutdown sock Unix.SHUTDOWN_ALL;
    Printf.fprintf log "%t caught %s, shutting down server\n" 
      log_banner (Printexc.to_string x);
    flush log;
    Unix.close sock



let error_net (emptyh:http) (f: 'a -> 'b) (x: 'a) : 'b =
  try f x; hflush emptyh
  with
  | Exit -> ()
  | Timeout ->
      Printf.fprintf log "%t timeout\n" log_banner;
      flush log;
      http_error (408,"Request Timeout") emptyh
  | HTTP_error (stat,msg) -> 
      Printf.fprintf log "%t error %i %s\n" log_banner stat msg;
      flush log;
      http_error (stat,msg) emptyh;
      exit 1
  | x -> 
      Printf.fprintf log "%t exception %s\n" log_banner (Printexc.to_string x); 
      flush log;
      http_error (500,"Internal Server Error") emptyh;
      exit 1



let set_timeout timeout =
  if timeout > 0 then (
    Sys.set_signal Sys.sigalrm (Sys.Signal_handle (fun _ -> raise Timeout));
    ignore (Unix.alarm timeout)
   )
    

(* request helper;
   directly handles static files, server-side cookies (TODO) ;
   pass the rest to handler function;
   handles errors
*)
let request_handler (handler:http -> unit) (h:http) =
  error_net h 
    (fun h -> 
      Printf.fprintf log "%t request %s, %i cookie(s), %i post(s), ref %s\n" 
        log_banner 
        (Uri.to_string h.http_uri)
        (List.length h.http_cookies) (List.length h.http_post)
        (match h.http_referer with None -> "none" | Some x -> Uri.to_string x);
      flush log;

      (* TODO: server-side cookies *)

      match h.http_uri.Uri.uri_path with
      | l when List.exists (fun a -> a = "cgi-bin" || a = "cgi-bin2") l ->
          if log_full_request then (log_http_req log h; flush log);
          handler h
      | [] | ["static"] ->
          http_redirect (Filename.concat "static" "index.html") h
      | ["favicon.ico" as f] ->
          http_redirect (Filename.concat "static" f) h
      | ["static";res] ->
          check_filename res;
          http_send_file (Filename.concat "static" res) h
      | _ -> raise (HTTP_error (404,"Not Found")) 
    ) h



(* entry points *)
(* ------------ *)

(* generic HTTP server *)
let http_server (port:int) (handler:http -> unit) =
  server port
    (fun sock ->
      let start = Sys.time () in
      Printf.fprintf log "%t handling HTTP request\n" log_banner;
      flush log;
      set_timeout timeout;
      error_net
        (http_empty_request sock)
        (fun sock -> request_handler handler (http_parse_request sock)) sock;
      let stop = Sys.time () in
      Printf.fprintf log "%t request ended normally (%f s)\n" log_banner (stop-.start);
      flush log
    )
    

(* generic CGI-bin answerer *)
let cgi_answerer (handler:http -> unit) =
  let start = Sys.time () in
  Printf.fprintf log "%t handling CGI request\n" log_banner;
  flush log;
  set_timeout timeout;
  error_net
    (cgi_empty_request ()) 
    (fun () -> request_handler handler (cgi_parse_request ())) ();
  let stop = Sys.time () in
  Printf.fprintf log "%t request ended normally (%f s)\n" log_banner (stop-.start);
  flush log
