(* Copyright (C) 2007 Mauricio Fernandez http//eigenclass.org * See README.txt and LICENSE for the redistribution and modification terms *) let prefix = Bigstring.BM_search.make "GET /ongoing/When/" let re = "[0-9][0-9][0-9]x/\\([0-9][0-9][0-9][0-9]/[0-9][0-9]/[0-9][0-9]/[^ .]+\\) " let re = Str.regexp re let copy_until_space src off dst = let rec loop src i dst j = let c = Bigstring.String.unsafe_get src i in dst.[j] <- c; if c <> ' ' then loop src (i+1) dst (j+1) in loop (Bigstring.unsafe_string src) off dst 0 let process_block hash bs = try let buf = String.make 2048 ' ' in let off = ref 0 in let len = Bigstring.length bs in while true do off := Bigstring.BM_search.find_end prefix bs !off; if !off >= len then raise Not_found; copy_until_space bs !off buf; if Str.string_match re buf 0 then let k = Str.matched_group 1 buf in off := !off + 17; (* the match is at least this long *) try incr (Hashtbl.find hash k) with Not_found -> Hashtbl.add hash k (ref 1) done with Not_found -> () let sort_results hash = List.sort (fun (_,a) (_,b) -> b - a) (Hashtbl.fold (fun url hits l -> (url, !hits) :: l) hash []) let rec print_top_n n = function [] -> () | ((url, hits) :: tl) -> if n > 0 then (Printf.printf "%d: %s\n" hits url; print_top_n (n-1) tl) let line_finder filename = let bs = Bigstring.map_file (Unix.openfile filename [Unix.O_RDONLY] 0o644) (-1) in let hash = Hashtbl.create 2000 in process_block hash bs; print_top_n 10 (sort_results hash) let () = line_finder (Array.get Sys.argv 1)