arrange/arrange.go

229 lines
4.6 KiB
Go
Raw Normal View History

2016-05-17 23:03:37 -07:00
package arrange
import (
"crypto/md5"
"fmt"
"image/gif"
2016-05-17 23:03:37 -07:00
"image/jpeg"
"image/png"
2016-05-17 23:03:37 -07:00
"io"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
2016-05-18 00:19:39 -07:00
var exts map[string]bool
func init() {
exts = map[string]bool{
// images
".jpg": true,
".jpeg": true,
".png": true,
".gif": true,
// videos
".mov": true,
".mp4": true,
".m4v": true,
2016-05-19 21:22:26 -07:00
".avi": true,
2016-05-18 00:19:39 -07:00
}
}
func mtime(path string) (time.Time, error) {
ti := time.Time{}
s, err := os.Stat(path)
if err != nil {
return ti, fmt.Errorf("failure to collect times from stat: %v", err)
}
return s.ModTime(), nil
2016-05-17 23:03:37 -07:00
}
2016-05-19 21:29:17 -07:00
// PrepOutput creates all possible content-address prefix directories.
2016-05-17 23:03:37 -07:00
func PrepOutput(root string) error {
for i := 0; i <= 0xff; i++ {
dirname := filepath.Join(root, "content", fmt.Sprintf("%02x", i))
if err := os.MkdirAll(dirname, 0755); err != nil {
return err
}
}
if err := os.MkdirAll(filepath.Join(root, "date"), 0755); err != nil {
return err
}
return nil
}
2016-05-19 21:29:17 -07:00
// Source returns sends all files that match known extensions.
2016-05-18 00:19:39 -07:00
func Source(root string) <-chan string {
2016-05-17 23:03:37 -07:00
out := make(chan string)
go func() {
err := filepath.Walk(
root,
func(path string, info os.FileInfo, err error) error {
2016-05-18 21:04:21 -07:00
if err != nil {
return err
}
2016-05-17 23:03:37 -07:00
if info.IsDir() {
return nil
}
ext := strings.ToLower(filepath.Ext(path))
if _, ok := exts[ext]; ok {
out <- path
}
return nil
},
)
if err != nil {
log.Printf("problem during crawl: %+v", err)
}
close(out)
}()
return out
}
2016-05-19 21:29:17 -07:00
// Parse runs the file parser for each file on input chan, and sends results
// down output chan.
//
// Exists so that it can be called many times concurrently.
func Parse(in <-chan string) <-chan Media {
out := make(chan Media)
2016-05-17 23:03:37 -07:00
go func() {
for path := range in {
f, err := _parse(path)
if err != nil {
switch err.(type) {
case NotMedia:
log.Printf("%+v", err)
default:
log.Printf("parse error: %+v", err)
}
continue
} else {
out <- f
}
}
close(out)
}()
return out
}
2016-05-19 21:29:17 -07:00
// Move calls Move on each Media on input chan. It is the first step in the
// pipeline after fan-in.
func Move(in <-chan Media, root string) <-chan error {
2016-05-17 23:03:37 -07:00
out := make(chan error)
go func() {
for i := range in {
out <- i.Move(root)
}
close(out)
}()
return out
}
func _parse(path string) (Media, error) {
2016-05-17 23:03:37 -07:00
ext := strings.ToLower(filepath.Ext(path))
var r Media
hash := md5.New()
var t time.Time
f, err := os.Open(path)
if err != nil {
return r, fmt.Errorf("problem opening file: %v", err)
}
defer f.Close()
2016-05-17 23:03:37 -07:00
switch ext {
default:
return r, NotMedia{path}
2016-05-17 23:03:37 -07:00
case ".jpg", ".jpeg":
if _, err := jpeg.DecodeConfig(f); err != nil {
return r, NotMedia{path}
2016-05-17 23:03:37 -07:00
}
if _, err := f.Seek(0, 0); err != nil {
return r, fmt.Errorf("couldn't seek back in file: %v", err)
2016-05-17 23:03:37 -07:00
}
// try a few things for a time value
{
success := false
if t, err = parseExif(f); err == nil {
success = true
}
if !success {
t, err = mtime(path)
}
if err != nil {
return r, fmt.Errorf("unable to calculate reasonble time for jpg %q: %v", path, err)
2016-05-17 23:03:37 -07:00
}
}
case ".png":
if _, err := png.DecodeConfig(f); err != nil {
return r, NotMedia{path}
}
2016-05-17 23:03:37 -07:00
if _, err := f.Seek(0, 0); err != nil {
return r, fmt.Errorf("couldn't seek back in file: %v", err)
}
t, err = mtime(path)
if err != nil {
return r, fmt.Errorf("unable to calculate reasonble time for media %q: %v", path, err)
2016-05-17 23:03:37 -07:00
}
case ".gif":
if _, err := gif.DecodeConfig(f); err != nil {
return r, NotMedia{path}
2016-05-17 23:03:37 -07:00
}
if _, err := f.Seek(0, 0); err != nil {
return r, fmt.Errorf("couldn't seek back in file: %v", err)
}
t, err = mtime(path)
if err != nil {
return r, fmt.Errorf("unable to calculate reasonble time for media %q: %v", path, err)
2016-05-17 23:03:37 -07:00
}
2016-05-19 21:22:26 -07:00
case ".mov", ".mp4", ".m4v", ".avi":
t, err = mtime(path)
if err != nil {
return r, fmt.Errorf("unable to calculate reasonble time for media %q: %v", path, err)
}
}
if _, err := f.Seek(0, 0); err != nil {
return r, fmt.Errorf("couldn't seek back in file: %v", err)
}
if _, err := io.Copy(hash, f); err != nil {
return r, fmt.Errorf("problem calculating checksum on %q: %v", path, err)
}
r = Media{
Path: path,
Hash: fmt.Sprintf("%x", hash.Sum(nil)),
Extension: ext,
Time: t,
2016-05-17 23:03:37 -07:00
}
return r, nil
}
2016-05-19 21:29:17 -07:00
// Merge implements fan-in.
func Merge(cs []<-chan Media) <-chan Media {
out := make(chan Media)
2016-05-17 23:03:37 -07:00
var wg sync.WaitGroup
output := func(c <-chan Media) {
2016-05-17 23:03:37 -07:00
for n := range c {
out <- n
}
wg.Done()
}
for _, c := range cs {
go output(c)
}
wg.Add(len(cs))
go func() {
wg.Wait()
close(out)
}()
return out
}