sm
/
picmv
1
0
Fork 0
simple program to move a directory with thousands of files into 256 directories which are first byte of the md5 of the file.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

144 lines
2.5 KiB

package main
import (
"crypto/md5"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
"sync"
)
const usage = "picmv <indir> <outdir>"
type input struct {
path string
hash string
ext string
}
type stats struct {
total int
moved int
}
func main() {
if len(os.Args) != 3 {
fmt.Fprintf(os.Stderr, "%s\n", usage)
os.Exit(1)
}
in, out := os.Args[1], os.Args[2]
log.Printf("%+v", in)
for i := 0; i <= 0xff; i++ {
dirname := filepath.Join(out, fmt.Sprintf("%02x", i))
os.MkdirAll(dirname, 0755)
}
st := stats{}
work := source(in)
res := []<-chan input{}
for w := 0; w < 16; w++ {
res = append(res, compute(work))
}
for in := range merge(res) {
if in.ext != ".jpg" {
log.Printf("%+v", in)
}
st.total++
finalDest := filepath.Join(out, in.hash[:2], in.hash[2:]+in.ext)
if _, err := os.Stat(finalDest); !os.IsNotExist(err) {
log.Printf("dup detected: %+v", in)
continue
}
if err := os.Link(in.path, finalDest); err != nil {
log.Printf("%+v", err)
}
st.moved++
}
log.Printf("total files moved : %d", st.moved)
log.Printf("total files processed : %d", st.total)
}
func source(root string) <-chan string {
out := make(chan string)
go func() {
err := filepath.Walk(
root,
func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
out <- path
return nil
},
)
if err != nil {
log.Printf("problem from crawling root %q: %+v", root, err)
}
close(out)
}()
return out
}
func compute(work <-chan string) <-chan input {
out := make(chan input)
go func() {
for path := range work {
h, err := _hash(path)
if err != nil {
log.Printf("problem hashing: %+v", err)
continue
}
out <- h
}
close(out)
}()
return out
}
func merge(cs []<-chan input) <-chan input {
out := make(chan input)
var wg sync.WaitGroup
output := func(c <-chan input) {
for n := range c {
out <- n
}
wg.Done()
}
for _, c := range cs {
go output(c)
}
wg.Add(len(cs))
go func() {
wg.Wait()
close(out)
}()
return out
}
func _hash(path string) (input, error) {
f, err := os.Open(path)
if err != nil {
return input{}, fmt.Errorf("problem opening file: %v", err)
}
defer f.Close()
hash := md5.New()
if _, err := io.Copy(hash, f); err != nil {
return input{}, fmt.Errorf("problem calculating hash for %q: %+v", path, err)
}
r := input{
path: path,
hash: fmt.Sprintf("%x", hash.Sum(nil)),
ext: strings.ToLower(filepath.Ext(path)),
}
return r, nil
}