1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
|
(***********************************************************************)
(* *)
(* SpamOracle -- a Bayesian spam filter *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 2002 Institut National de Recherche en Informatique et *)
(* en Automatique. This file is distributed under the terms of the *)
(* GNU Public License version 2, http://www.gnu.org/licenses/gpl.txt *)
(* *)
(***********************************************************************)
(* $Id$ *)
(** Approximate HTML scanner. Extracts words and certain parameters
of certain tags (e.g. URLs) from HTML text. *)
val extract_text: string -> string
|