package main import ( "crypto/md5" "fmt" "io" "log" "os" "path/filepath" "strings" "sync" ) const usage = "picmv " type input struct { path string hash string ext string } type stats struct { total int moved int } func main() { if len(os.Args) != 3 { fmt.Fprintf(os.Stderr, "%s\n", usage) os.Exit(1) } in, out := os.Args[1], os.Args[2] log.Printf("%+v", in) for i := 0; i <= 0xff; i++ { dirname := filepath.Join(out, fmt.Sprintf("%02x", i)) os.MkdirAll(dirname, 0755) } st := stats{} work := source(in) res := []<-chan input{} for w := 0; w < 16; w++ { res = append(res, compute(work)) } for in := range merge(res) { if in.ext != ".jpg" { log.Printf("%+v", in) } st.total++ finalDest := filepath.Join(out, in.hash[:2], in.hash[2:]+in.ext) if _, err := os.Stat(finalDest); !os.IsNotExist(err) { log.Printf("dup detected: %+v", in) continue } if err := os.Link(in.path, finalDest); err != nil { log.Printf("%+v", err) } st.moved++ } log.Printf("total files moved : %d", st.moved) log.Printf("total files processed : %d", st.total) } func source(root string) <-chan string { out := make(chan string) go func() { err := filepath.Walk( root, func(path string, info os.FileInfo, err error) error { if info.IsDir() { return nil } out <- path return nil }, ) if err != nil { log.Printf("problem from crawling root %q: %+v", root, err) } close(out) }() return out } func compute(work <-chan string) <-chan input { out := make(chan input) go func() { for path := range work { h, err := _hash(path) if err != nil { log.Printf("problem hashing: %+v", err) continue } out <- h } close(out) }() return out } func merge(cs []<-chan input) <-chan input { out := make(chan input) var wg sync.WaitGroup output := func(c <-chan input) { for n := range c { out <- n } wg.Done() } for _, c := range cs { go output(c) } wg.Add(len(cs)) go func() { wg.Wait() close(out) }() return out } func _hash(path string) (input, error) { f, err := os.Open(path) if err != nil { return input{}, fmt.Errorf("problem opening file: %v", err) } defer f.Close() hash := md5.New() if _, err := io.Copy(hash, f); err != nil { return input{}, fmt.Errorf("problem calculating hash for %q: %+v", path, err) } r := input{ path: path, hash: fmt.Sprintf("%x", hash.Sum(nil)), ext: strings.ToLower(filepath.Ext(path)), } return r, nil }