From 598a57468592a60847dc01f28faecc406dc22492 Mon Sep 17 00:00:00 2001 From: "Stephen McQuay (smcquay)" Date: Tue, 17 May 2016 23:03:37 -0700 Subject: [PATCH] refactor --- arrange.go | 169 +++++++++++++++++++++++++ cmd/am/main.go | 70 +++++++++++ errors.go | 19 +++ image.go | 92 ++++++++++++++ main.go | 330 ------------------------------------------------- 5 files changed, 350 insertions(+), 330 deletions(-) create mode 100644 arrange.go create mode 100644 cmd/am/main.go create mode 100644 errors.go create mode 100644 image.go delete mode 100644 main.go diff --git a/arrange.go b/arrange.go new file mode 100644 index 0000000..aba4f60 --- /dev/null +++ b/arrange.go @@ -0,0 +1,169 @@ +package arrange + +import ( + "crypto/md5" + "fmt" + "image/jpeg" + "io" + "log" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +type File interface { + Move(root string) error +} + +func PrepOutput(root string) error { + for i := 0; i <= 0xff; i++ { + dirname := filepath.Join(root, "content", fmt.Sprintf("%02x", i)) + if err := os.MkdirAll(dirname, 0755); err != nil { + return err + } + } + if err := os.MkdirAll(filepath.Join(root, "date"), 0755); err != nil { + return err + } + return nil +} + +func Source(root string, exts map[string]bool) <-chan string { + out := make(chan string) + go func() { + err := filepath.Walk( + root, + func(path string, info os.FileInfo, err error) error { + if info.IsDir() { + return nil + } + ext := strings.ToLower(filepath.Ext(path)) + if _, ok := exts[ext]; ok { + out <- path + } else { + log.Printf("ignoring: %q", path) + } + return nil + }, + ) + if err != nil { + log.Printf("problem during crawl: %+v", err) + } + close(out) + }() + return out +} + +func Parse(in <-chan string) <-chan File { + out := make(chan File) + go func() { + for path := range in { + f, err := _parse(path) + if err != nil { + switch err.(type) { + case NotMedia: + log.Printf("%+v", err) + default: + log.Printf("parse error: %+v", err) + } + continue + } else { + out <- f + } + } + close(out) + }() + + return out +} + +func Move(in <-chan File, root string) <-chan error { + out := make(chan error) + go func() { + for i := range in { + out <- i.Move(root) + } + close(out) + }() + return out +} + +func _parse(path string) (File, error) { + ext := strings.ToLower(filepath.Ext(path)) + var r File + switch ext { + default: + return nil, NotMedia{path} + case ".jpg", ".jpeg": + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("problem opening file: %v", err) + } + defer f.Close() + + if _, err := jpeg.DecodeConfig(f); err != nil { + return nil, NotMedia{path} + } + if _, err := f.Seek(0, 0); err != nil { + return nil, fmt.Errorf("couldn't seek back in file: %v", err) + } + + // try a few things for a time value + var t time.Time + { + success := false + if t, err = parseExif(f); err == nil { + success = true + } + if !success { + log.Printf("no exif for %q: %+v", path, err) + t, err = mtime(path) + } + if err != nil { + return nil, fmt.Errorf("unable to calculate reasonble time for jpg %q: %v", path, err) + } + } + + if _, err := f.Seek(0, 0); err != nil { + return nil, fmt.Errorf("couldn't seek back in file: %v", err) + } + hash := md5.New() + if _, err := io.Copy(hash, f); err != nil { + return nil, fmt.Errorf("problem calculating checksum on %q: %v", path, err) + } + r = Image{ + Path: path, + Hash: fmt.Sprintf("%x", hash.Sum(nil)), + Year: fmt.Sprintf("%04d", t.Year()), + Month: fmt.Sprintf("%02d", t.Month()), + Time: fmt.Sprintf("%d", t.UnixNano()), + } + case ".png": + return nil, fmt.Errorf("NYI: %q", path) + case ".mov", ".mp4", ".m4v": + return nil, fmt.Errorf("NYI: %q", path) + } + return r, nil +} + +func Merge(cs []<-chan File) <-chan File { + out := make(chan File) + var wg sync.WaitGroup + output := func(c <-chan File) { + for n := range c { + out <- n + } + wg.Done() + } + for _, c := range cs { + go output(c) + } + wg.Add(len(cs)) + go func() { + wg.Wait() + close(out) + }() + return out +} diff --git a/cmd/am/main.go b/cmd/am/main.go new file mode 100644 index 0000000..4a3805b --- /dev/null +++ b/cmd/am/main.go @@ -0,0 +1,70 @@ +package main + +import ( + "fmt" + "log" + "os" + + "mcquay.me/arrange" +) + +const usage = "aj " + +type stats struct { + total int + dupes int + moved int +} + +func main() { + log.SetFlags(log.Lshortfile) + if len(os.Args) != 3 { + fmt.Fprintf(os.Stderr, "%s\n", usage) + os.Exit(1) + } + in, out := os.Args[1], os.Args[2] + + if err := arrange.PrepOutput(out); err != nil { + fmt.Fprintf(os.Stderr, "problem creating directory structure: %v", err) + os.Exit(1) + } + + exts := map[string]bool{ + // images + ".jpg": true, + ".jpeg": true, + ".png": true, + ".gif": true, + + // videos + ".mov": true, + ".mp4": true, + ".m4v": true, + } + + work := arrange.Source(in, exts) + streams := []<-chan arrange.File{} + + for w := 0; w < 16; w++ { + streams = append(streams, arrange.Parse(work)) + } + + st := stats{} + for err := range arrange.Move(arrange.Merge(streams), out) { + st.total++ + if err != nil { + switch err.(type) { + case arrange.Dup: + st.dupes++ + default: + log.Printf("%+v", err) + } + } else { + st.moved++ + } + } + + log.Printf("dupes: %+v", st.dupes) + log.Printf("moved: %+v", st.moved) + log.Printf("total: %+v", st.total) +} diff --git a/errors.go b/errors.go new file mode 100644 index 0000000..3465d2c --- /dev/null +++ b/errors.go @@ -0,0 +1,19 @@ +package arrange + +import "fmt" + +type NotMedia struct { + Path string +} + +func (nm NotMedia) Error() string { + return fmt.Sprintf("not media: %q", nm.Path) +} + +type Dup struct { + Path string +} + +func (d Dup) Error() string { + return fmt.Sprintf("dup: %q", d.Path) +} diff --git a/image.go b/image.go new file mode 100644 index 0000000..968dc39 --- /dev/null +++ b/image.go @@ -0,0 +1,92 @@ +package arrange + +import ( + "errors" + "fmt" + "io" + "os" + "path/filepath" + "time" + + "github.com/rwcarlsen/goexif/exif" +) + +type Media struct { + Path string +} + +func (m Media) Move(root string) error { + return errors.New("NYI") +} + +type Image struct { + Path string + Hash string + Year string + Month string + Time string +} + +func (im Image) Move(root string) error { + f, err := os.Open(im.Path) + if err != nil { + return fmt.Errorf("problem opening jpg file: %v", err) + } + defer f.Close() + + content := filepath.Join(root, "content", im.Hash[:2], im.Hash[2:]+".jpg") + + if _, err := os.Stat(content); !os.IsNotExist(err) { + return Dup{content} + } + + out, err := os.Create(content) + if err != nil { + return fmt.Errorf("could not create output file: %v", err) + } + defer out.Close() + + if _, err := io.Copy(out, f); err != nil { + return fmt.Errorf("trouble copying file: %v", err) + } + if err := os.MkdirAll(filepath.Join(root, "date", im.Year, im.Month), 0755); err != nil { + return fmt.Errorf("problem creating date directory: %v", err) + } + + date := filepath.Join(root, "date", im.Year, im.Month, im.Time) + name := date + ".jpg" + for i := 0; i < 10000; i++ { + if _, err := os.Stat(name); os.IsNotExist(err) { + break + } + name = fmt.Sprintf("%s_%04d.jpg", date, i) + } + + // TODO: or maybe symlinking? (issue #2) + // rel := filepath.Join("..", "..", "..", "content", j.hash[:2], j.hash[2:]+".jpg") + // return os.Symlink(rel, name) + return os.Link(content, name) +} +func parseExif(f io.Reader) (time.Time, error) { + ti := time.Time{} + x, err := exif.Decode(f) + if err != nil { + if exif.IsCriticalError(err) { + return ti, err + } + } + tm, err := x.DateTime() + if err != nil { + return ti, fmt.Errorf("no datetime in an ostensibly valid exif %v", err) + } + return tm, nil +} + +func mtime(path string) (time.Time, error) { + ti := time.Time{} + s, err := os.Stat(path) + if err != nil { + return ti, fmt.Errorf("failure to collect times from stat: %v", err) + } + return s.ModTime(), nil +} diff --git a/main.go b/main.go deleted file mode 100644 index 0504f86..0000000 --- a/main.go +++ /dev/null @@ -1,330 +0,0 @@ -package main - -import ( - "crypto/md5" - "errors" - "fmt" - "image/jpeg" - "io" - "log" - "os" - "path/filepath" - "strings" - "sync" - "time" - - "github.com/rwcarlsen/goexif/exif" -) - -const usage = "aj " - -type file interface { - move(root string) error -} - -type jpg struct { - path string - hash string - year string - month string - time string -} - -func (j jpg) move(root string) error { - f, err := os.Open(j.path) - if err != nil { - return fmt.Errorf("problem opening jpg file: %v", err) - } - defer f.Close() - - content := filepath.Join(root, "content", j.hash[:2], j.hash[2:]+".jpg") - - if _, err := os.Stat(content); !os.IsNotExist(err) { - return dup{content} - } - - out, err := os.Create(content) - if err != nil { - return fmt.Errorf("could not create output file: %v", err) - } - defer out.Close() - - if _, err := io.Copy(out, f); err != nil { - return fmt.Errorf("trouble copying file: %v", err) - } - if err := os.MkdirAll(filepath.Join(root, "date", j.year, j.month), 0755); err != nil { - return fmt.Errorf("problem creating date directory: %v", err) - } - - date := filepath.Join(root, "date", j.year, j.month, j.time) - name := date + ".jpg" - for i := 0; i < 10000; i++ { - if _, err := os.Stat(name); os.IsNotExist(err) { - break - } - name = fmt.Sprintf("%s_%04d.jpg", date, i) - } - - // TODO: or maybe symlinking? (issue #2) - // rel := filepath.Join("..", "..", "..", "content", j.hash[:2], j.hash[2:]+".jpg") - // return os.Symlink(rel, name) - return os.Link(content, name) -} - -type media struct { - path string -} - -func (m media) move(root string) error { - return errors.New("NYI") -} - -type stats struct { - total int - dupes int - moved int -} - -func main() { - log.SetFlags(log.Lshortfile) - if len(os.Args) != 3 { - fmt.Fprintf(os.Stderr, "%s\n", usage) - os.Exit(1) - } - in, out := os.Args[1], os.Args[2] - - if err := prepOutput(out); err != nil { - fmt.Fprintf(os.Stderr, "problem creating directory structure: %v", err) - os.Exit(1) - } - - exts := map[string]bool{ - // images - ".jpg": true, - ".jpeg": true, - ".png": true, - ".gif": true, - - // videos - ".mov": true, - ".mp4": true, - ".m4v": true, - } - - work := source(in, exts) - streams := []<-chan file{} - - for w := 0; w < 16; w++ { - streams = append(streams, parse(work)) - } - - st := stats{} - for err := range move(merge(streams), out) { - st.total++ - if err != nil { - switch err.(type) { - case dup: - st.dupes++ - default: - log.Printf("%+v", err) - } - } else { - st.moved++ - } - } - - log.Printf("dupes: %+v", st.dupes) - log.Printf("moved: %+v", st.moved) - log.Printf("total: %+v", st.total) -} - -func prepOutput(root string) error { - for i := 0; i <= 0xff; i++ { - dirname := filepath.Join(root, "content", fmt.Sprintf("%02x", i)) - if err := os.MkdirAll(dirname, 0755); err != nil { - return err - } - } - if err := os.MkdirAll(filepath.Join(root, "date"), 0755); err != nil { - return err - } - return nil -} - -func source(root string, exts map[string]bool) <-chan string { - out := make(chan string) - go func() { - err := filepath.Walk( - root, - func(path string, info os.FileInfo, err error) error { - if info.IsDir() { - return nil - } - ext := strings.ToLower(filepath.Ext(path)) - if _, ok := exts[ext]; ok { - out <- path - } else { - log.Printf("ignoring: %q", path) - } - return nil - }, - ) - if err != nil { - log.Printf("problem during crawl: %+v", err) - } - close(out) - }() - return out -} - -func parse(in <-chan string) <-chan file { - out := make(chan file) - go func() { - for path := range in { - f, err := _parse(path) - if err != nil { - switch err.(type) { - case notMedia: - log.Printf("%+v", err) - default: - log.Printf("parse error: %+v", err) - } - continue - } else { - out <- f - } - } - close(out) - }() - - return out -} - -func move(in <-chan file, root string) <-chan error { - out := make(chan error) - go func() { - for i := range in { - out <- i.move(root) - } - close(out) - }() - return out -} - -func _parse(path string) (file, error) { - ext := strings.ToLower(filepath.Ext(path)) - var r file - switch ext { - default: - return nil, notMedia{path} - case ".jpg", ".jpeg": - f, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("problem opening file: %v", err) - } - defer f.Close() - - if _, err := jpeg.DecodeConfig(f); err != nil { - return nil, notMedia{path} - } - if _, err := f.Seek(0, 0); err != nil { - return nil, fmt.Errorf("couldn't seek back in file: %v", err) - } - - // try a few things for a time value - var t time.Time - { - success := false - if t, err = parseExif(f); err == nil { - success = true - } - if !success { - log.Printf("no exif for %q: %+v", path, err) - t, err = mtime(path) - } - if err != nil { - return nil, fmt.Errorf("unable to calculate reasonble time for jpg %q: %v", path, err) - } - } - - if _, err := f.Seek(0, 0); err != nil { - return nil, fmt.Errorf("couldn't seek back in file: %v", err) - } - hash := md5.New() - if _, err := io.Copy(hash, f); err != nil { - return nil, fmt.Errorf("problem calculating checksum on %q: %v", path, err) - } - r = jpg{ - path: path, - hash: fmt.Sprintf("%x", hash.Sum(nil)), - year: fmt.Sprintf("%04d", t.Year()), - month: fmt.Sprintf("%02d", t.Month()), - time: fmt.Sprintf("%d", t.UnixNano()), - } - case ".png": - return nil, fmt.Errorf("NYI: %q", path) - case ".mov", ".mp4", ".m4v": - return nil, fmt.Errorf("NYI: %q", path) - } - return r, nil -} - -func merge(cs []<-chan file) <-chan file { - out := make(chan file) - var wg sync.WaitGroup - output := func(c <-chan file) { - for n := range c { - out <- n - } - wg.Done() - } - for _, c := range cs { - go output(c) - } - wg.Add(len(cs)) - go func() { - wg.Wait() - close(out) - }() - return out -} - -type notMedia struct { - path string -} - -func (nm notMedia) Error() string { - return fmt.Sprintf("not media: %q", nm.path) -} - -type dup struct { - path string -} - -func (d dup) Error() string { - return fmt.Sprintf("dup: %q", d.path) -} - -func parseExif(f io.Reader) (time.Time, error) { - ti := time.Time{} - x, err := exif.Decode(f) - if err != nil { - if exif.IsCriticalError(err) { - return ti, err - } - } - tm, err := x.DateTime() - if err != nil { - return ti, fmt.Errorf("no datetime in an ostensibly valid exif %v", err) - } - return tm, nil -} - -func mtime(path string) (time.Time, error) { - ti := time.Time{} - s, err := os.Stat(path) - if err != nil { - return ti, fmt.Errorf("failure to collect times from stat: %v", err) - } - return s.ModTime(), nil -}