dm/cs
1
0
forked from sm/cs

Compare commits

..

16 Commits

5 changed files with 272 additions and 145 deletions

176
check.go
View File

@ -15,18 +15,78 @@ import (
"sync" "sync"
) )
// input contains a file-ish piece of work to perform
type input struct {
f io.ReadCloser
err error
}
// checksum contains the path to a file, a way to hash it, and the results of
// the hash
type checksum struct { type checksum struct {
filename string filename string
hash hash.Hash hash hash.Hash
checksum string checksum string
err error
} }
func parseCS(line string) (checksum, error) { // check is the entry point for -c operation.
elems := strings.Fields(line) func check(args []string, verbose bool) chan error {
if len(elems) != 2 { jobs := make(chan checksum)
return checksum{}, fmt.Errorf("unexpected content: %d != 2", len(elems))
go func() {
for i := range toInput(args) {
if i.err != nil {
jobs <- checksum{err: i.err}
break
} }
cs, f := elems[0], elems[1] s := bufio.NewScanner(i.f)
for s.Scan() {
jobs <- parseCS(s.Text())
}
i.f.Close()
if s.Err() != nil {
jobs <- checksum{err: s.Err()}
}
}
close(jobs)
}()
results := []<-chan error{}
for w := 0; w < *ngo; w++ {
results = append(results, verify(jobs, verbose))
}
return merge(results)
}
// toInput converts args to a stream of input
func toInput(args []string) chan input {
r := make(chan input)
go func() {
for _, name := range args {
f, err := os.Open(name)
r <- input{f, err}
}
if len(args) == 0 {
r <- input{f: os.Stdin}
}
close(r)
}()
return r
}
// parseCS picks apart a line from a checksum file and returns everything
// needed to perform a checksum.
func parseCS(line string) checksum {
elems := strings.Fields(line)
if len(elems) < 1 {
return checksum{err: fmt.Errorf("couldn't find checksum in %q", line)}
}
cs := elems[0]
var hsh hash.Hash var hsh hash.Hash
switch len(cs) { switch len(cs) {
case 32: case 32:
@ -38,70 +98,43 @@ func parseCS(line string) (checksum, error) {
case 128: case 128:
hsh = sha512.New() hsh = sha512.New()
default: default:
return checksum{}, fmt.Errorf("unknown format: %q", line) return checksum{err: fmt.Errorf("unknown format: %q", line)}
} }
return checksum{filename: f, hash: hsh, checksum: cs}, nil
return checksum{filename: strings.TrimSpace(line[len(cs):]), hash: hsh, checksum: cs}
} }
type input struct { // verify does grunt work of verifying a stream of jobs (filenames).
f io.ReadCloser func verify(jobs chan checksum, verbose bool) chan error {
err error r := make(chan error)
}
type work struct {
cs checksum
err error
}
func streams(files []string) chan input {
r := make(chan input)
go func() { go func() {
for _, name := range files { for job := range jobs {
f, err := os.Open(name) if job.err != nil {
r <- input{f, err} log.Printf("%+v", job.err)
continue
}
f, err := os.Open(job.filename)
if err != nil {
r <- err
continue
}
if _, err := io.Copy(job.hash, f); err != nil {
r <- err
continue
}
f.Close()
if fmt.Sprintf("%x", job.hash.Sum(nil)) != job.checksum {
r <- fmt.Errorf("%s: bad", job.filename)
} else if verbose {
fmt.Fprintf(os.Stderr, "ok: %v\n", job.filename)
} }
if len(files) == 0 {
r <- input{f: os.Stdin}
} }
close(r) close(r)
}() }()
return r return r
} }
func check(files []string) chan error { // merge is simple error fan-in
jobs := make(chan work)
go func() {
for stream := range streams(files) {
if stream.err != nil {
jobs <- work{err: stream.err}
break
}
s := bufio.NewScanner(stream.f)
for s.Scan() {
cs, err := parseCS(s.Text())
jobs <- work{cs, err}
}
stream.f.Close()
if s.Err() != nil {
jobs <- work{err: s.Err()}
}
}
close(jobs)
}()
results := []<-chan error{}
workers := 32
for w := 0; w < workers; w++ {
results = append(results, compute(jobs))
}
return merge(results)
}
func merge(cs []<-chan error) chan error { func merge(cs []<-chan error) chan error {
out := make(chan error) out := make(chan error)
@ -125,30 +158,3 @@ func merge(cs []<-chan error) chan error {
}() }()
return out return out
} }
func compute(jobs chan work) chan error {
r := make(chan error)
go func() {
for job := range jobs {
if job.err != nil {
log.Printf("%+v", job.err)
continue
}
f, err := os.Open(job.cs.filename)
if err != nil {
r <- fmt.Errorf("open: %v", err)
continue
}
if _, err := io.Copy(job.cs.hash, f); err != nil {
r <- err
continue
}
f.Close()
if fmt.Sprintf("%x", job.cs.hash.Sum(nil)) != job.cs.checksum {
r <- fmt.Errorf("%s: bad", job.cs.filename)
}
}
close(r)
}()
return r
}

149
hash.go Normal file
View File

@ -0,0 +1,149 @@
package main
import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"fmt"
"hash"
"io"
"os"
"sort"
"sync"
)
// result is a message or error payload
type result struct {
f string
cs string
err error
}
// results exists to sort a slice of result
type results []result
func (r results) Len() int { return len(r) }
func (r results) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r results) Less(i, j int) bool { return r[i].f < r[j].f }
// hashr exists so that we can make a thing that can return valid hash.Hash
// interfaces.
type hashr func() hash.Hash
// hsh figures out which hash algo to use, and distributes the work of hashing
func hsh(files []string, verbose bool) chan result {
var h hashr
switch *algo {
case "sha1", "1":
h = sha1.New
case "sha256", "256":
h = sha256.New
case "sha512", "512":
h = sha512.New
case "md5":
h = md5.New
default:
r := make(chan result)
go func() {
r <- result{err: fmt.Errorf("unsupported algorithm: %v (supported: md5, sha1, sha256, sha512)", *algo)}
close(r)
}()
return r
}
if len(files) == 0 {
r := make(chan result)
go func() {
hsh := h()
_, err := io.Copy(hsh, os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
r <- result{cs: fmt.Sprintf("%x", hsh.Sum(nil)), f: "-"}
close(r)
}()
return r
}
jobs := make(chan checksum)
go func() {
for _, name := range files {
jobs <- checksum{filename: name}
}
close(jobs)
}()
res := []<-chan result{}
for w := 0; w < *ngo; w++ {
res = append(res, compute(h, jobs, verbose))
}
o := make(chan result)
go func() {
rs := results{}
for r := range rmerge(res) {
rs = append(rs, r)
}
sort.Sort(rs)
for _, r := range rs {
o <- r
}
close(o)
}()
return o
}
// compute is the checksumming workhorse
func compute(h hashr, jobs chan checksum, verbose bool) chan result {
hsh := h()
r := make(chan result)
go func() {
for job := range jobs {
f, err := os.Open(job.filename)
if err != nil {
r <- result{err: err}
continue
}
hsh.Reset()
_, err = io.Copy(hsh, f)
f.Close()
if err != nil {
r <- result{err: err}
continue
}
if verbose {
fmt.Fprintf(os.Stderr, "%v\n", job.filename)
}
r <- result{f: job.filename, cs: fmt.Sprintf("%x", hsh.Sum(nil))}
}
close(r)
}()
return r
}
// rmerge implements fan-in
func rmerge(cs []<-chan result) chan result {
out := make(chan result)
var wg sync.WaitGroup
output := func(c <-chan result) {
for n := range c {
out <- n
}
wg.Done()
}
wg.Add(len(cs))
for _, c := range cs {
go output(c)
}
go func() {
wg.Wait()
close(out)
}()
return out
}

View File

@ -1,5 +1,5 @@
MIT License MIT License
Copyright (c) 2015 smcquay Copyright (c) 2016 derek mcquay, stephen mcquay
Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in this software and associated documentation files (the "Software"), to deal in

72
main.go
View File

@ -1,78 +1,42 @@
package main package main
import ( import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"flag" "flag"
"fmt" "fmt"
"io"
"os" "os"
"runtime"
) )
var algo = flag.String("a", "sha1", "algorithm to use") var algo = flag.String("a", "sha256", "algorithm to use")
var mode = flag.Bool("c", false, "check") var mode = flag.Bool("c", false, "check")
var ngo = flag.Int("n", runtime.NumCPU(), "number of goroutines")
var verbose = flag.Bool("v", false, "vebose")
func main() { func main() {
flag.Parse() flag.Parse()
files := flag.Args() files := flag.Args()
switch *mode { switch *mode {
case true: case true:
c := 0 ec := 0
for err := range check(files) { for err := range check(files, *verbose) {
c++ ec++
fmt.Fprintf(os.Stderr, "%v\n", err) fmt.Fprintf(os.Stderr, "%v\n", err)
} }
if c > 0 { if ec > 0 {
os.Exit(1) os.Exit(1)
} }
case false: case false:
if err := hsh(files); err != nil { ec := 0
fmt.Fprintf(os.Stderr, "%v\n", err) for res := range hsh(files, *verbose) {
os.Exit(1) if res.err != nil {
} ec++
} fmt.Fprintf(os.Stderr, "%v\n", res.err)
}
func hsh(files []string) error {
h := sha256.New()
switch *algo {
case "sha1", "1":
h = sha1.New()
case "sha256", "256":
h = sha256.New()
case "sha512", "512":
h = sha512.New()
case "md5":
h = md5.New()
default:
return fmt.Errorf("unsupported algorithm: %v", *algo)
}
if len(files) == 0 {
_, err := io.Copy(h, os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
fmt.Printf("%x -\n", h.Sum(nil))
} else { } else {
for _, name := range files { fmt.Printf("%v %v\n", res.cs, res.f)
f, err := os.Open(name) }
if err != nil { }
fmt.Fprintf(os.Stderr, "%v\n", err) if ec > 0 {
continue os.Exit(1)
}
h.Reset()
_, err = io.Copy(h, f)
f.Close()
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
continue
}
fmt.Printf("%x %s\n", h.Sum(nil), name)
} }
} }
return nil
} }

View File

@ -1,12 +1,20 @@
# cs # cs
calculate checksums concurrently calculate/verify checksums (cs)
It's a simpler version of shasum + md5sum, but only for md5, sha1, sha256, and It's a simpler version of shasum + md5sum, but concurrently and only with
sha512. support for md5, sha1, sha256, and sha512.
## usage ## usage
# create checksums
cs -a 256 < foo.txt cs -a 256 < foo.txt
cs foo.txt cs foo.txt
cs -a sha1 foo.txt foo.txt foo.txt cs -a sha1 foo.txt foo.txt foo.txt > checksums.sha1
# verify
cat checksums.sha1 | cs -c
cs -c checksums.sha1
# both
cs $(find ~/src/mcquay.me | grep '\.go$') | cs -c