spider/cmd/crawl/main.go

44 lines
773 B
Go
Raw Permalink Normal View History

2016-07-28 20:13:20 -07:00
package main
import (
"fmt"
2016-11-25 23:48:44 -08:00
"io"
"io/ioutil"
2016-07-28 20:13:20 -07:00
"net/http"
"os"
"mcquay.me/spider"
)
const usage = "crawl <url>"
func main() {
if len(os.Args) < 2 {
fmt.Fprintf(os.Stderr, "%s\n", usage)
os.Exit(1)
}
failures := []spider.Link{}
for p := range spider.Pages(os.Args[1]) {
resp, err := http.Get(p.To)
if err != nil {
2016-11-25 23:48:44 -08:00
p.Err = err
2016-07-28 20:13:20 -07:00
failures = append(failures, p)
2016-11-25 23:48:44 -08:00
continue
2016-07-28 20:13:20 -07:00
}
2016-11-25 23:48:44 -08:00
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
2016-07-28 20:13:20 -07:00
if resp.StatusCode != http.StatusOK {
2016-11-25 23:48:44 -08:00
p.Err = fmt.Errorf("http status; got %s, want %s", http.StatusText(resp.StatusCode), http.StatusText(http.StatusOK))
2016-07-28 20:13:20 -07:00
failures = append(failures, p)
}
}
if len(failures) > 0 {
for _, f := range failures {
fmt.Fprintf(os.Stderr, "%+v\n", f)
}
os.Exit(1)
}
}