parallel hashing

This commit is contained in:
Stephen McQuay 2016-11-15 20:50:04 -08:00
parent f82decc6dc
commit 5e10c8f583
No known key found for this signature in database
GPG Key ID: 1ABF428F71BAFC3D
2 changed files with 112 additions and 56 deletions

124
check.go
View File

@ -6,13 +6,13 @@ import (
"crypto/sha1" "crypto/sha1"
"crypto/sha256" "crypto/sha256"
"crypto/sha512" "crypto/sha512"
"errors"
"fmt" "fmt"
"hash" "hash"
"io" "io"
"log" "log"
"os" "os"
"strings" "strings"
"sync"
) )
type checksum struct { type checksum struct {
@ -43,61 +43,113 @@ func parseCS(line string) (checksum, error) {
return checksum{filename: f, hash: hsh, checksum: cs}, nil return checksum{filename: f, hash: hsh, checksum: cs}, nil
} }
func check(files []string) error { type input struct {
streams := []io.ReadCloser{} f io.ReadCloser
defer func() { err error
for _, stream := range streams {
stream.Close()
} }
}()
type work struct {
cs checksum
err error
}
func streams(files []string) chan input {
r := make(chan input)
go func() {
for _, name := range files { for _, name := range files {
f, err := os.Open(name) f, err := os.Open(name)
if err != nil { r <- input{f, err}
return err
}
streams = append(streams, f)
} }
if len(files) == 0 { if len(files) == 0 {
streams = append(streams, os.Stdin) r <- input{f: os.Stdin}
}
close(r)
}()
return r
} }
jobs := []checksum{} func check(files []string) chan error {
for _, stream := range streams { jobs := make(chan work)
s := bufio.NewScanner(stream)
go func() {
for stream := range streams(files) {
if stream.err != nil {
jobs <- work{err: stream.err}
break
}
s := bufio.NewScanner(stream.f)
for s.Scan() { for s.Scan() {
cs, err := parseCS(s.Text()) cs, err := parseCS(s.Text())
if err != nil { jobs <- work{cs, err}
return err
}
jobs = append(jobs, cs)
} }
stream.f.Close()
if s.Err() != nil { if s.Err() != nil {
return s.Err() jobs <- work{err: s.Err()}
} }
} }
close(jobs)
}()
results := []<-chan error{}
workers := 8
for w := 0; w < workers; w++ {
results = append(results, compute(jobs))
}
errs := 0 return merge(results)
for _, job := range jobs {
f, err := os.Open(job.filename)
if err != nil {
return fmt.Errorf("open: %v", err)
} }
if _, err := io.Copy(job.hash, f); err != nil {
func merge(cs []<-chan error) chan error {
out := make(chan error)
var wg sync.WaitGroup
output := func(c <-chan error) {
for n := range c {
out <- n
}
wg.Done()
}
wg.Add(len(cs))
for _, c := range cs {
go output(c)
}
go func() {
wg.Wait()
close(out)
}()
return out
}
func compute(jobs chan work) chan error {
r := make(chan error)
go func() {
for job := range jobs {
if job.err != nil {
log.Printf("%+v", job.err)
continue
}
f, err := os.Open(job.cs.filename)
if err != nil {
r <- fmt.Errorf("open: %v", err)
continue
}
if _, err := io.Copy(job.cs.hash, f); err != nil {
log.Printf("%+v", err) log.Printf("%+v", err)
} }
f.Close() f.Close()
if fmt.Sprintf("%x", job.hash.Sum(nil)) == job.checksum { if fmt.Sprintf("%x", job.cs.hash.Sum(nil)) == job.cs.checksum {
fmt.Printf("%s: OK\n", job.filename) fmt.Printf("%s: OK\n", job.cs.filename)
} else { } else {
errs++ r <- fmt.Errorf("%s: bad", job.cs.filename)
fmt.Fprintf(os.Stderr, "%s: bad\n", job.filename)
} }
} }
close(r)
var err error }()
if errs != 0 { return r
err = errors.New("bad files found")
}
return err
} }

View File

@ -19,8 +19,12 @@ func main() {
files := flag.Args() files := flag.Args()
switch *mode { switch *mode {
case true: case true:
if err := check(files); err != nil { c := 0
for err := range check(files) {
c++
fmt.Fprintf(os.Stderr, "%v\n", err) fmt.Fprintf(os.Stderr, "%v\n", err)
}
if c > 0 {
os.Exit(1) os.Exit(1)
} }
case false: case false: