1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
|
package hellfire // import "pathspider.net/hellfire"
import (
"archive/zip"
"log"
)
type AlexaTopsitesList struct {
TestList
filename string
}
// URL to download the latest Alexa Topsites list from
const AlexaTopsitesURL string = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
func (l *AlexaTopsitesList) SetFilename(filename string) {
l.filename = filename
}
func (l *AlexaTopsitesList) FeedJobs(jobs chan map[string]interface{}) {
var topsites *CSVList
if l.filename == "" {
urlReader, err := getReaderFromUrl(AlexaTopsitesURL)
if err != nil {
log.Fatalf("Unable to get <%s>: %s", AlexaTopsitesURL, err)
}
zr, err := zip.NewReader(urlReader, int64(urlReader.Len()))
if err != nil {
log.Fatalf("Unable to read zip: %s", err)
}
for _, zf := range zr.File {
if zf.Name == "top-1m.csv" {
f, _ := zf.Open()
topsites = CSVListFromReader(f)
break
}
}
if topsites == nil {
panic("Did not find top-1m.csv in the zip archive")
}
} else {
topsites = CSVListFromFile(l.filename)
}
topsites.SetHeader([]string{"rank", "domain"})
topsites.FeedJobs(jobs)
}
|