arranging jpegs mostly right.

This commit is contained in:
Stephen McQuay 2016-05-17 01:18:53 -07:00
parent ed0cd04250
commit 2bb3b1a318
No known key found for this signature in database
GPG Key ID: 1ABF428F71BAFC3D
1 changed files with 249 additions and 33 deletions

282
main.go
View File

@ -1,20 +1,86 @@
package main
import (
"crypto/md5"
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
"sync"
"github.com/rwcarlsen/goexif/exif"
)
const usage = "picmv <indir> <outdir>"
const usage = "aj <indir> <outdir>"
type input struct {
type file interface {
move(root string) error
}
type jpg struct {
path string
hash string
year string
month string
time string
}
func (j jpg) move(root string) error {
f, err := os.Open(j.path)
if err != nil {
return fmt.Errorf("problem opening jpg file: %v", err)
}
defer f.Close()
content := filepath.Join(root, "content", j.hash[:2], j.hash[2:]+".jpg")
if _, err := os.Stat(content); !os.IsNotExist(err) {
return dup{content}
}
out, err := os.Create(content)
if err != nil {
return fmt.Errorf("could not create output file: %v", err)
}
defer out.Close()
if _, err := io.Copy(out, f); err != nil {
return fmt.Errorf("trouble copying file: %v", err)
}
if err := os.MkdirAll(filepath.Join(root, "date", j.year, j.month), 0755); err != nil {
return fmt.Errorf("problem creating date directory: %v", err)
}
date := filepath.Join(root, "date", j.year, j.month, j.time)
name := date + ".jpg"
for i := 0; i < 10000; i++ {
if _, err := os.Stat(name); os.IsNotExist(err) {
break
}
name = fmt.Sprintf("%s_%04d.jpg", date, i)
}
// TODO: or maybe symlinking? (issue #2)
// rel := filepath.Join("..", "..", "..", "content", j.hash[:2], j.hash[2:]+".jpg")
// return os.Symlink(rel, name)
return os.Link(content, name)
}
type media struct {
path string
}
func (m media) move(root string) error {
return errors.New("NYI")
}
type stats struct {
total int
dupes int
moved int
}
func main() {
@ -23,51 +89,201 @@ func main() {
os.Exit(1)
}
in, out := os.Args[1], os.Args[2]
log.Printf("%+v", in)
log.Printf("%+v", out)
count := 0
files := make(chan input, 20)
// seen := map[string]bool{}
if err := prepOutput(out); err != nil {
fmt.Fprintf(os.Stderr, "problem creating directory structure: %v", err)
os.Exit(1)
}
exts := map[string]bool{
// images
".jpg": true,
".jpeg": true,
".png": true,
".gif": true,
// videos
".mov": true,
".mp4": true,
".m4v": true,
}
work := source(in, exts)
streams := []<-chan file{}
for w := 0; w < 16; w++ {
streams = append(streams, parse(work))
}
st := stats{}
for err := range move(merge(streams), out) {
st.total++
if err != nil {
switch err.(type) {
case dup:
st.dupes++
default:
log.Printf("%+v", err)
}
} else {
st.moved++
}
}
log.Printf("dupes: %+v", st.dupes)
log.Printf("moved: %+v", st.moved)
log.Printf("total: %+v", st.total)
}
func prepOutput(root string) error {
for i := 0; i <= 0xff; i++ {
dirname := filepath.Join(root, "content", fmt.Sprintf("%02x", i))
if err := os.MkdirAll(dirname, 0755); err != nil {
return err
}
}
if err := os.MkdirAll(filepath.Join(root, "date"), 0755); err != nil {
return err
}
return nil
}
func source(root string, exts map[string]bool) <-chan string {
out := make(chan string)
go func() {
err := filepath.Walk(
in,
root,
func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
f, err := os.Open(path)
if err != nil {
return fmt.Errorf("problem opening file: %v", err)
}
defer f.Close()
x, err := exif.Decode(f)
if err != nil {
log.Printf("problem decoding exif data %q: %v", path, err)
return nil
}
tm, err := x.DateTime()
if err != nil {
return fmt.Errorf("problem getting datetime from pic %v: %v", path, err)
}
files <- input{
path: path,
year: fmt.Sprintf("%04d", tm.Year()),
month: fmt.Sprintf("%02d", tm.Month()),
ext := strings.ToLower(filepath.Ext(path))
if _, ok := exts[ext]; ok {
out <- path
} else {
log.Printf("ignoring: %q", path)
}
return nil
},
)
if err != nil {
log.Printf("%+v", err)
log.Printf("problem during crawl: %+v", err)
}
close(files)
close(out)
}()
return out
}
func parse(in <-chan string) <-chan file {
out := make(chan file)
go func() {
for path := range in {
f, err := _parse(path)
if err != nil {
switch err.(type) {
case notMedia:
log.Printf("%+v", err)
default:
log.Printf("%+v", err)
}
continue
} else {
out <- f
}
}
close(out)
}()
for in := range files {
log.Printf("%+v", in)
count++
}
log.Printf("%+v", count)
return out
}
func move(in <-chan file, root string) <-chan error {
out := make(chan error)
go func() {
for i := range in {
out <- i.move(root)
}
close(out)
}()
return out
}
func _parse(path string) (file, error) {
ext := strings.ToLower(filepath.Ext(path))
var r file
switch ext {
default:
return nil, notMedia{path}
case ".jpg", ".jpeg":
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("problem opening file: %v", err)
}
defer f.Close()
x, err := exif.Decode(f)
if err != nil {
// TODO: sometimes valid jpgs have bad exif data (issue #1)
return nil, notMedia{path}
}
tm, err := x.DateTime()
if err != nil {
return nil, fmt.Errorf("problem getting datetime from pic %v: %v", path, err)
}
if _, err := f.Seek(0, 0); err != nil {
return nil, fmt.Errorf("couldn't seek back in file: %v", err)
}
// TODO: multi writer with this to decide if it's valid jpg?
hash := md5.New()
if _, err := io.Copy(hash, f); err != nil {
return nil, fmt.Errorf("problem calculating checksum on %q: %v", path, err)
}
r = jpg{
path: path,
hash: fmt.Sprintf("%x", hash.Sum(nil)),
year: fmt.Sprintf("%04d", tm.Year()),
month: fmt.Sprintf("%02d", tm.Month()),
time: fmt.Sprintf("%d", tm.UnixNano()),
}
case ".png":
return nil, fmt.Errorf("NYI: %q", path)
case ".mov", ".mp4", ".m4v":
return nil, fmt.Errorf("NYI: %q", path)
}
return r, nil
}
func merge(cs []<-chan file) <-chan file {
out := make(chan file)
var wg sync.WaitGroup
output := func(c <-chan file) {
for n := range c {
out <- n
}
wg.Done()
}
for _, c := range cs {
go output(c)
}
wg.Add(len(cs))
go func() {
wg.Wait()
close(out)
}()
return out
}
type notMedia struct {
path string
}
func (nm notMedia) Error() string {
return fmt.Sprintf("not media: %q", nm.path)
}
type dup struct {
path string
}
func (d dup) Error() string {
return fmt.Sprintf("dup: %q", d.path)
}