From c12c5f9df91fbd5962cbfae8dfef76bd73af2cde Mon Sep 17 00:00:00 2001 From: stephen mcquay Date: Sun, 15 May 2016 22:51:52 -0700 Subject: [PATCH] init --- main.go | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 main.go diff --git a/main.go b/main.go new file mode 100644 index 0000000..bd5921e --- /dev/null +++ b/main.go @@ -0,0 +1,144 @@ +package main + +import ( + "crypto/md5" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" + "sync" +) + +const usage = "picmv " + +type input struct { + path string + hash string + ext string +} + +type stats struct { + total int + moved int +} + +func main() { + if len(os.Args) != 3 { + fmt.Fprintf(os.Stderr, "%s\n", usage) + os.Exit(1) + } + in, out := os.Args[1], os.Args[2] + log.Printf("%+v", in) + + for i := 0; i <= 0xff; i++ { + dirname := filepath.Join(out, fmt.Sprintf("%02x", i)) + os.MkdirAll(dirname, 0755) + } + + st := stats{} + + work := source(in) + res := []<-chan input{} + for w := 0; w < 16; w++ { + res = append(res, compute(work)) + } + + for in := range merge(res) { + if in.ext != ".jpg" { + log.Printf("%+v", in) + } + st.total++ + finalDest := filepath.Join(out, in.hash[:2], in.hash[2:]+in.ext) + if _, err := os.Stat(finalDest); !os.IsNotExist(err) { + log.Printf("dup detected: %+v", in) + continue + } + if err := os.Link(in.path, finalDest); err != nil { + log.Printf("%+v", err) + } + st.moved++ + } + log.Printf("total files moved : %d", st.moved) + log.Printf("total files processed : %d", st.total) +} + +func source(root string) <-chan string { + out := make(chan string) + go func() { + err := filepath.Walk( + root, + func(path string, info os.FileInfo, err error) error { + if info.IsDir() { + return nil + } + out <- path + return nil + }, + ) + if err != nil { + log.Printf("problem from crawling root %q: %+v", root, err) + } + close(out) + }() + return out +} + +func compute(work <-chan string) <-chan input { + out := make(chan input) + go func() { + for path := range work { + h, err := _hash(path) + if err != nil { + log.Printf("problem hashing: %+v", err) + continue + } + out <- h + } + close(out) + }() + return out +} + +func merge(cs []<-chan input) <-chan input { + out := make(chan input) + var wg sync.WaitGroup + + output := func(c <-chan input) { + for n := range c { + out <- n + } + wg.Done() + } + + for _, c := range cs { + go output(c) + } + + wg.Add(len(cs)) + + go func() { + wg.Wait() + close(out) + }() + return out +} + +func _hash(path string) (input, error) { + f, err := os.Open(path) + if err != nil { + return input{}, fmt.Errorf("problem opening file: %v", err) + } + defer f.Close() + hash := md5.New() + if _, err := io.Copy(hash, f); err != nil { + return input{}, fmt.Errorf("problem calculating hash for %q: %+v", path, err) + } + r := input{ + path: path, + hash: fmt.Sprintf("%x", hash.Sum(nil)), + ext: strings.ToLower(filepath.Ext(path)), + } + return r, nil +}