(* Copyright (C) 2007 Mauricio Fernandez http//eigenclass.org * See README.txt and LICENSE for the redistribution and modification terms *) let prefix = BM_search.make "GET /ongoing/When/" let re = "[0-9][0-9][0-9]x/\\([0-9][0-9][0-9][0-9]/[0-9][0-9]/[0-9][0-9]/[^ .]+\\) " let re = Str.regexp re let process_block hash buf len = try let off = ref 0 in while true do off := BM_search.find_end prefix buf !off; if !off >= len then raise Not_found; if Str.string_match re buf !off then let k = Str.matched_group 1 buf in Hashtbl.replace hash k (try Hashtbl.find hash k + 1 with Not_found -> 1); off := !off + 17 (* the match is at least this long *) done with Not_found -> () let sort_results hash = List.sort (fun (_,a) (_,b) -> b - a) (Hashtbl.fold (fun url hits l -> (url, hits) :: l) hash []) let rec print_top_n n = function [] -> () | ((url, hits) :: tl) -> if n > 0 then (Printf.printf "%d: %s\n" hits url; print_top_n (n-1) tl) let input_bytes ic buf off n = let rec aux ic buf bytes off = if (off < bytes) then let br = input ic buf off (bytes-off) in if (br > 0) then aux ic buf bytes (off + br) else off else off in aux ic buf (n + off) off let line_finder filename = let ic = open_in filename in let hash = Hashtbl.create 2000 in let buf = String.create Sys.max_string_length in let off = ref 0 in try while true do let bytes = input_bytes ic buf !off (Sys.max_string_length - 2048) in process_block hash buf bytes; let last_line_start = String.rindex_from buf bytes '\n' in off := bytes - last_line_start; String.blit buf !off buf 0 !off; done with End_of_file | Not_found -> print_top_n 10 (sort_results hash) let () = line_finder (Array.get Sys.argv 1)