File: alexa.go

package info (click to toggle)
hellfire 0.0~git20180708.bf3c390-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, buster, sid, trixie
  • size: 96 kB
  • sloc: makefile: 2
file content (51 lines) | stat: -rw-r--r-- 1,121 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
package hellfire // import "pathspider.net/hellfire"

import (
	"archive/zip"
	"log"
)

type AlexaTopsitesList struct {
	TestList
	filename string
}

// URL to download the latest Alexa Topsites list from
const AlexaTopsitesURL string = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"

func (l *AlexaTopsitesList) SetFilename(filename string) {
	l.filename = filename
}

func (l *AlexaTopsitesList) FeedJobs(jobs chan map[string]interface{}) {
	var topsites *CSVList

	if l.filename == "" {
		urlReader, err := getReaderFromUrl(AlexaTopsitesURL)
		if err != nil {
			log.Fatalf("Unable to get <%s>: %s", AlexaTopsitesURL, err)
		}

		zr, err := zip.NewReader(urlReader, int64(urlReader.Len()))
		if err != nil {
			log.Fatalf("Unable to read zip: %s", err)
		}

		for _, zf := range zr.File {
			if zf.Name == "top-1m.csv" {
				f, _ := zf.Open()
				topsites = CSVListFromReader(f)
				break
			}
		}

		if topsites == nil {
			panic("Did not find top-1m.csv in the zip archive")
		}
	} else {
		topsites = CSVListFromFile(l.filename)
	}

	topsites.SetHeader([]string{"rank", "domain"})
	topsites.FeedJobs(jobs)
}