2016-07-28 20:13:20 -07:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2016-11-25 23:48:44 -08:00
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
2016-07-28 20:13:20 -07:00
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
|
|
|
|
"mcquay.me/spider"
|
|
|
|
)
|
|
|
|
|
|
|
|
const usage = "crawl <url>"
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
if len(os.Args) < 2 {
|
|
|
|
fmt.Fprintf(os.Stderr, "%s\n", usage)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
failures := []spider.Link{}
|
|
|
|
for p := range spider.Pages(os.Args[1]) {
|
|
|
|
resp, err := http.Get(p.To)
|
|
|
|
if err != nil {
|
2016-11-25 23:48:44 -08:00
|
|
|
p.Err = err
|
2016-07-28 20:13:20 -07:00
|
|
|
failures = append(failures, p)
|
2016-11-25 23:48:44 -08:00
|
|
|
continue
|
2016-07-28 20:13:20 -07:00
|
|
|
}
|
2016-11-25 23:48:44 -08:00
|
|
|
io.Copy(ioutil.Discard, resp.Body)
|
|
|
|
resp.Body.Close()
|
2016-07-28 20:13:20 -07:00
|
|
|
if resp.StatusCode != http.StatusOK {
|
2016-11-25 23:48:44 -08:00
|
|
|
p.Err = fmt.Errorf("http status; got %s, want %s", http.StatusText(resp.StatusCode), http.StatusText(http.StatusOK))
|
2016-07-28 20:13:20 -07:00
|
|
|
failures = append(failures, p)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(failures) > 0 {
|
|
|
|
for _, f := range failures {
|
|
|
|
fmt.Fprintf(os.Stderr, "%+v\n", f)
|
|
|
|
}
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
}
|