(* Copyright (C) 2008 Mauricio Fernandez http//eigenclass.org * See README.txt and LICENSE for the redistribution and modification terms *) open Printf open ExtList module H = Hashtbl let re = Pcre.regexp "^/ongoing/When/\\d\\d\\dx/\\d\\d\\d\\d/\\d\\d/\\d\\d/[^ .]+$" let self_ref_re = Pcre.regexp "^\"http://www.tbray.org/ongoing/" let (@@) f x = f x let (+!), (/!), (-!) = Int64.add, Int64.div, Int64.sub let u_hits, u_bytes, s404s, clients, refs = H.create 8192, H.create 8192, H.create 8192, H.create 65535, H.create 65535 let incr_count h k = try incr (H.find h k) with Not_found -> H.add h k (ref 1) let add_to_num_count h k n = try let r = H.find h k in r := Int64.add !r n with Not_found -> H.add h k (ref n) let report ?(n = 10) compf print_f label hash = let keep n x ((elms, set) as t) = if elms = 0 then (1, PMap.add x true set) else let min, _ = PMap.min_binding set in if compf x min <= 0 then t else if elms < n then (elms + 1, PMap.add x true set) else (elms, PMap.add x true (PMap.remove_min_binding set)) in let elements (_, set) = List.rev (PMap.foldi (fun k _ l -> k :: l) set []) in printf "Top %s:\n" label; List.iter print_f @@ List.rev @@ elements @@ H.fold (fun k v s -> keep n (!v, k) s) hash (0, PMap.create compf); printf "\n" let fsize filename = (Unix.LargeFile.stat filename).Unix.LargeFile.st_size let chunks n filename = let len = fsize filename in let size = len /! (Int64.of_int n) in let rec loop ic acc start = if start >= len then (close_in ic; acc) else let stop = start +! size in if stop >= len then (close_in ic; (start, len) :: acc) else begin LargeFile.seek_in ic stop; try ignore (input_line ic); let p = LargeFile.pos_in ic in loop ic ((start, p) :: acc) p with End_of_file -> close_in ic; (start, len) :: acc end in List.rev (loop (open_in filename) [] 0L) open Str_util let b = make_word_info 1024 let proc_chunk (start, stop) = let record str b u bytes = if bytes <> 0 then add_to_num_count u_bytes u (Int64.of_int bytes); if Pcre.pmatch ~rex:re u then begin let client = word b str 0 and ref = word b str 10 in incr_count u_hits u; incr_count clients client; if ref <> "\"-\"" && not (Pcre.pmatch ~rex:self_ref_re ref) then incr_count refs (String.sub ref 1 (max 0 (String.length ref - 2))) end in let process_block str len = let rec loop str offset len = if offset < len then begin find_line_words ~offset b str len; if num_words b >= 11 && word b str 5 = "\"GET" then begin match word b str 8 with "200" -> record str b (word b str 6) (try int_of_string (word b str 9) with _ -> 0) | "304" -> record str b (word b str 6) 0 | "404" -> incr_count s404s (word b str 6) | _ -> () end; loop str (line_end_offset b + 1) len end in loop str 0 len in let buflen = min Sys.max_string_length 20000000 in let buf = String.create buflen in let to_l h = H.fold (fun k v l -> (k, !v) :: l) h [] in let fd = Unix.openfile Sys.argv.(1) [Unix.O_RDONLY] 0o644 in ignore (Unix.LargeFile.lseek fd start Unix.SEEK_SET); let rec loop off n = let read = Util.input_bytes fd buf off (Int64.to_int (min n (Int64.of_int (buflen - off)))) in let bytes = off + read in let last_line_start = try String.rindex_from buf (bytes - 1) '\n' + 1 with Not_found -> bytes in let read = Int64.of_int read in process_block buf last_line_start; if read < n then begin String.blit buf last_line_start buf 0 (bytes - last_line_start); loop (bytes - last_line_start) (n -! read) end else (to_l u_hits, to_l u_bytes, to_l s404s, to_l clients, to_l refs) in H.clear u_bytes; List.iter H.clear [u_hits; s404s; clients; refs]; loop 0 (stop -! start) let merge (u_hits', u_bytes', s404s', clients', refs') = let add f newh oldh = List.iter (fun (k, v) -> try let old = Hashtbl.find oldh k in old := f !old v with Not_found -> Hashtbl.add oldh k (ref v)) newh in add (+!) u_bytes' u_bytes; List.iter (fun (a, b) -> add (+) a b) [u_hits', u_hits; s404s', s404s; clients', clients; refs', refs] let () = let compare_pairs compare (a1, b1) (a2, b2) = match compare a1 a2 with 0 -> String.compare b2 b1 (* reverse *) | r -> r in let shrink s = if String.length s > 60 then String.sub s 0 60 ^ "..." else s in let r1 ?n = report ?n (compare_pairs compare) (fun (n, s) -> printf " %10d: %s\n" n (shrink s)) in let mega = 1024. *. 1024. in let r2 ?n = report ?n (compare_pairs Int64.compare) (fun (n, s) -> printf " %9.1fM: %s\n" (Int64.to_float n /. mega) (shrink s)) in let workers = try int_of_string (Array.get Sys.argv 2) with _ -> 1 in let nchunks = try int_of_string (Array.get Sys.argv 3) with _ -> workers in let rec collect acc = function [] -> acc | ((res, isdone) as hd) :: tl -> if isdone () then (merge (res ()); acc @ tl) (* only one merge per iteration *) else collect (hd :: acc) tl in let wait () = ignore (Unix.select [] [] [] 0.001) in let rec loop ws chunks = match (ws, chunks) with [], [] -> () | ws, chunks when List.length ws >= workers -> wait(); loop (collect [] ws) chunks | ws, [] -> wait (); loop (collect [] ws) chunks | ws, (c::tl) -> loop (Parallel.invoke' proc_chunk c :: ws) tl in let c = (chunks nchunks (Array.get Sys.argv 1)) in List.iter (fun (s, e) -> eprintf "%s -- %s\n%!" (Int64.to_string s) (Int64.to_string e)) c; loop [] c; let len = H.length in printf "%d resources, %d 404s, %d clients\n\n" (len u_hits) (len s404s) (len clients); r1 "URIs by hit" u_hits; r2 "URIs by bytes" u_bytes; r1 "404s" s404s; r1 "client addresses" clients; r1 "referrers" refs