(* Copyright (C) 2007 Mauricio Fernandez http//eigenclass.org * See README.txt and LICENSE for the redistribution and modification terms *) open Bigarray type bigstring_t = { bigarr : (char, int8_unsigned_elt, c_layout) Array1.t; data : string; length : int } let map_file fd ?pos ?(shared=false) len = let ba = Array1.map_file fd ?pos char c_layout shared len in let s = (Obj.magic (Obj.field (Obj.repr ba) 1) : string) in { bigarr = ba; data = s; length = Array1.dim ba } let length t = t.length let unsafe_string t = t.data let unsafe_get t n = String.unsafe_get t.data n let get t n = if n < 0 || n >= length t then invalid_arg "Bigstring.get: index out of bounds"; unsafe_get t n let unsafe_set t n c = String.unsafe_set t.data n c let set t n c = if n < 0 || n > length t then invalid_arg "Bigstring.set: index out of bounds"; unsafe_set t n c module BM_search = struct exception Done of int let memcmp a o1 b o2 len = let rec loop a b o1 o2 i = if i < len then if a.[o1] = b.[o2] then loop a b (o1 + 1) (o2 + 1) (i + 1) else false else true in loop a b o1 o2 0 let boyermoore_needlematch needle nlen portion offset = let virtual_begin = ref (nlen - offset - portion) in let ignore = ref 0 in if !virtual_begin < 0 then begin ignore := - !virtual_begin; virtual_begin := 0 end; if !virtual_begin > 0 && needle.[!virtual_begin - 1] == needle.[nlen - portion - 1] then false else memcmp needle (nlen - portion + !ignore) needle !virtual_begin (portion - !ignore) let max_i (a : int) (b : int) = if a > b then a else b type t = { skip : int array; occ : int array; needle : string; nlen : int } let make needle = let nlen = String.length needle in let skip = Array.make nlen 0 in let occ = Array.make 256 (-1) in for a = 0 to nlen - 1 - 1 do occ.(Char.code needle.[a]) <- a; done; for a = 0 to nlen - 1 do let value = ref 0 in while !value < nlen && not (boyermoore_needlematch needle nlen a !value) do incr value done; skip.(nlen - a - 1) <- !value done; { skip = skip; occ = occ; needle = needle; nlen = nlen } let uget = String.unsafe_get let boyermoore_search t haystack start hlen = let skip = t.skip and occ = t.occ and needle = t.needle and nlen = t.nlen in if nlen > hlen || nlen <= 0 then raise Not_found; try let hpos = ref start in let lim = hlen - nlen in while !hpos <= lim do let npos = ref (nlen - 1) in let c = ref (uget haystack (!npos + !hpos)) in while needle.[!npos] = !c do if !npos <> 0 then begin decr npos; c := uget haystack (!npos + !hpos) end else raise (Done !hpos) done; hpos := !hpos + max_i skip.(!npos) (!npos - occ.(Char.code !c)) done; raise Not_found with Done m -> m let find t haystack start = boyermoore_search t (unsafe_string haystack) start (length haystack) let find_end t haystack start = find t haystack start + t.nlen end module String = struct type t = string let unsafe_get t n = String.unsafe_get t n let unsafe_set t n c = String.unsafe_set t n c end