(* Copyright (C) 2007 Mauricio Fernandez http//eigenclass.org * See README.txt and LICENSE for the redistribution and modification terms *) let prefix = Bigstring.BM_search.make "GET /ongoing/When/" let digits = [|0; 1; 2; 3; 5; 6; 8; 9|] and ndigits = 8 let sub_until_space src off = let rec copy src i dst j n = if n > 0 then begin let c = Bigstring.String.unsafe_get src i in dst.[j] <- c; copy src (i+1) dst (j+1) (n-1) end in let rec count src i j = match Bigstring.String.unsafe_get src i with ' ' -> j | '.' -> raise Not_found | _ -> count src (i+1) (j+1) in let nchars = count (Bigstring.unsafe_string src) off 0 in let dst = if nchars < 12 then raise Not_found; String.create nchars in copy (Bigstring.unsafe_string src) off dst 0 nchars; dst let check_prefix bs off = let s = Bigstring.unsafe_string bs in for i = 0 to 2 do match Bigstring.String.unsafe_get s (off+i) with '0' .. '9' -> () | _ -> raise Not_found done; if Bigstring.String.unsafe_get s (off+3) <> 'x' || Bigstring.String.unsafe_get s (off+4) <> '/' then raise Not_found let check_match s = for i = 0 to ndigits - 1 do match s.[digits.(i)] with '0'..'9' -> () | _ -> raise Not_found done; if s.[4] <> '/' || s.[7] <> '/' || s.[10] <> '/' then raise Not_found module Hashtbl = Hashtbl.Make(struct type t = string let equal x y = if String.length x <> String.length y then false else x = y let hash x = (Char.code x.[5] - 48) lsl 4 + (Char.code x.[6] - 48) lsl 8 + (Char.code x.[8] - 48) lsl 16 + (Char.code x.[9] - 48) lsl 24 + Char.code x.[String.length x - 1] end) let process_block hash bs = try let off = ref 0 in let len = Bigstring.length bs in while true do off := Bigstring.BM_search.find_end prefix bs !off; if !off + 5 >= len then raise Not_found; try check_prefix bs !off; let k = sub_until_space bs (!off+5) in check_match k; off := !off + 22; (* 5 + 17 the match is at least this long *) try incr (Hashtbl.find hash k) with Not_found -> Hashtbl.add hash k (ref 1) with Not_found -> off := !off + 5 done with Not_found -> () let sort_results hash = List.sort (fun (_,a) (_,b) -> b - a) (Hashtbl.fold (fun url hits l -> (url, !hits) :: l) hash []) let rec print_top_n n = function [] -> () | ((url, hits) :: tl) -> if n > 0 then (Printf.printf "%d: %s\n" hits url; print_top_n (n-1) tl) let line_finder filename = let bs = Bigstring.map_file (Unix.openfile filename [Unix.O_RDONLY] 0o644) (-1) in let hash = Hashtbl.create 2000 in process_block hash bs; print_top_n 10 (sort_results hash) let () = line_finder (Array.get Sys.argv 1)