File: spider.ml

package info (click to toggle)
sks 1.1.5-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,076 kB
  • ctags: 3,243
  • sloc: ml: 15,262; ansic: 1,069; makefile: 346; sh: 284
file content (152 lines) | stat: -rw-r--r-- 4,957 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
(***********************************************************************)
(* spider.ml - start with a SKS server and spider the entire network   *)
(*             by recursively crawling peers from stats pages          *)
(*                                                                     *)
(* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, *)
(*               2011, 2012, 2013  Yaron Minsky and Contributors       *)
(*                                                                     *)
(* This file is part of SKS.  SKS is free software; you can            *)
(* redistribute it and/or modify it under the terms of the GNU General *)
(* Public License as published by the Free Software Foundation; either *)
(* version 2 of the License, or (at your option) any later version.    *)
(*                                                                     *)
(* This program is distributed in the hope that it will be useful, but *)
(* WITHOUT ANY WARRANTY; without even the implied warranty of          *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   *)
(* General Public License for more details.                            *)
(*                                                                     *)
(* You should have received a copy of the GNU General Public License   *)
(* along with this program; if not, write to the Free Software         *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA or see <http://www.gnu.org/licenses/>.                          *)
(***********************************************************************)

open StdLabels
open MoreLabels
open Printf
open Pstyle
open Common
module Set = PSet.Set
module Unix = UnixLabels

let stats_timeout = 10

(** Argument parsing *)
let root =
  if Array.length Sys.argv = 2 then
    (Sys.argv.(1),11370)
  else
    ("pool.sks-keyservers.net",11370)

let input_lines cin =
  let rec loop lines =
    match (try Some (input_line cin)
           with End_of_file -> None)
    with
      None -> List.rev lines
    | Some l -> loop (l::lines)
  in
  loop []

let get_ip_opt hostname =
  if hostname = "localhost" then None
  else
    try
      let he = Unix.gethostbyname hostname in
      Some he.Unix.h_addr_list
    with
      Invalid_argument _ | Not_found -> None

let fetch_url url =
  let cin = Unix.open_process_in (sprintf "curl -s -m %d \"%s\"" stats_timeout url) in
  let lines = input_lines cin in
  match Unix.close_process_in cin with
  | Unix.WEXITED 0 -> Some lines
  | _ -> None

let start_line = Str.regexp "<h2>Gossip Peers.*"
let whitespace = Str.regexp "[ \t<]+"
let end_td = Str.regexp "</td></tr>$"

let get_peer line =
  if line </> (0,8) = "<tr><td>" then
    match Str.split whitespace (Str.global_replace end_td "" line </> (8,0)) with
    | host::port::_ ->
        let port = int_of_string port in
        Some (host,port)
    | _ -> None
  else
    None

let build_url (host,port) =
  sprintf "http://%s:%d/pks/lookup?op=stats" host port

let lines_to_peers lines =
  let rec skip_to_start = function
    | line::((_::rest) as tl) ->
        if Str.string_match start_line line 0 then
          rest
        else skip_to_start tl
    | _ -> []
  in
  let lines = skip_to_start lines in
  let rec get_peers = function
    | [] -> []
    | hd::tl -> match get_peer hd with
      | Some peer -> peer :: get_peers tl
      | None -> []
  in
  get_peers lines

let multi_fetch (host,port) =
  let ports = [port+1] in
  (*let ports = if port <> 11370 then 11371::ports else ports in
  let ports = List.rev (80::ports) in *)
  let get_peers (host,port) =
    match fetch_url (build_url (host,port)) with
    | None -> None
    | Some x ->
        let peers = lines_to_peers x in
        if peers = [] then None
        else Some peers
  in
  let rec loop ports = match ports with
      [] -> None
    | port::tl ->
        match get_peers (host,port) with
        | Some x -> Some x
        | None -> loop tl
  in
  loop ports

let find_all peer =
  let visited = ref (Set.singleton None) in
  let rec dfs peer =
    let ip = get_ip_opt (fst peer) in
    if Set.mem ip !visited then
      []
    else
      begin
        visited := Set.add ip !visited;
        match multi_fetch peer with
        | None -> (* retrieval failed *)
            eprintf "(%s,%d) FAILED\n%!" (fst peer) (snd peer);
            []
        | Some peers ->
            try
              eprintf "(%s,%d)\n%!" (fst peer) (snd peer);
              let others = List.concat (List.map ~f:dfs peers) in
              peer :: others
            with e ->
              eprintf "(%s,%d) FAILED with %s\n%!" (fst peer) (snd peer)
                (Printexc.to_string e);
              []
      end
  in
  dfs peer


let () = if not !Sys.interactive then
  let servers = find_all root in
  printf "%d servers found\n" (List.length servers);
  List.iter ~f:(fun (host,port) -> printf "%s %d\n" host port) servers