From 1abff34b3d516655eb70a008edd85e3c0bc0f324 Mon Sep 17 00:00:00 2001 From: "Stephen McQuay (smcquay)" Date: Sat, 9 Jul 2016 21:40:14 -0700 Subject: [PATCH] add missing link cleanup subcommand. --- arrange.go | 25 +++++++++++++++++ cmd/am/clean.go | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ cmd/am/main.go | 14 +++++++++- media.go | 7 ++++- 4 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 cmd/am/clean.go diff --git a/arrange.go b/arrange.go index edfdc17..531dc5b 100644 --- a/arrange.go +++ b/arrange.go @@ -111,6 +111,31 @@ func Parse(in <-chan string) <-chan Media { return out } +// MissingLink detects if the values coming from medias is a duplicate file +// rather than a hardlink to the content store. +func MissingLink(medias <-chan Media, root string) (<-chan Media, <-chan error) { + out := make(chan Media) + errs := make(chan error) + go func() { + for m := range medias { + var d, c os.FileInfo + var err error + if d, err = os.Stat(m.Path); err != nil { + errs <- err + } + if c, err = os.Stat(m.Content(root)); err != nil { + errs <- err + } + if !os.SameFile(d, c) { + out <- m + } + } + close(errs) + close(out) + }() + return out, errs +} + // Move calls Move on each Media on input chan. It is the first step in the // pipeline after fan-in. func Move(in <-chan Media, root string) <-chan error { diff --git a/cmd/am/clean.go b/cmd/am/clean.go new file mode 100644 index 0000000..773a4fa --- /dev/null +++ b/cmd/am/clean.go @@ -0,0 +1,74 @@ +package main + +import ( + "fmt" + "log" + "os" + "path/filepath" + "runtime" + "sync" + + "mcquay.me/arrange" +) + +func clean(dir string) error { + dateDir := filepath.Join(dir, "date") + if _, err := os.Stat(dateDir); os.IsNotExist(err) { + return fmt.Errorf("couldn't find 'date' dir in %q", dir) + } + + work := arrange.Source(dateDir) + streams := []<-chan arrange.Media{} + errs := []<-chan error{} + + workers := runtime.NumCPU() + if *cores != 0 { + workers = *cores + } + + for w := 0; w < workers; w++ { + s, e := arrange.MissingLink(arrange.Parse(work), dir) + streams = append(streams, s) + errs = append(errs, e) + } + + var err error + go func() { + for e := range eMerge(errs) { + log.Printf("%+v", e) + err = fmt.Errorf("%v, %v", err, e) + } + }() + + for m := range arrange.Merge(streams) { + log.Printf("%q > %q", m.Path, m.Content(dir)) + if err := os.Remove(m.Path); err != nil { + log.Printf("%+v", err) + } + if err := os.Link(m.Content(dir), m.Path); err != nil { + log.Printf("%+v", err) + } + } + + return err +} + +func eMerge(cs []<-chan error) <-chan error { + out := make(chan error) + var wg sync.WaitGroup + output := func(c <-chan error) { + for n := range c { + out <- n + } + wg.Done() + } + for _, c := range cs { + go output(c) + } + wg.Add(len(cs)) + go func() { + wg.Wait() + close(out) + }() + return out +} diff --git a/cmd/am/main.go b/cmd/am/main.go index 9c204a7..431d8b0 100644 --- a/cmd/am/main.go +++ b/cmd/am/main.go @@ -7,8 +7,9 @@ import ( "os" ) -const usage = "am [flags]" +const usage = "am [flags]" const arrUsage = "am arr [-h|-cores=N] " +const cleanUsage = "am clean [-h|-cores=N] " type stats struct { total int @@ -41,6 +42,17 @@ func main() { fmt.Fprintf(os.Stderr, "problem arranging media: %v\n", err) os.Exit(1) } + case "c", "cl", "clean": + args := flag.Args() + if len(args) != 1 { + fmt.Fprintf(os.Stderr, "%s\n", cleanUsage) + os.Exit(1) + } + dir := args[0] + if err := clean(dir); err != nil { + fmt.Fprintf(os.Stderr, "problem cleaning: %v\n", err) + os.Exit(1) + } default: fmt.Fprintf(os.Stderr, "%s\n", usage) os.Exit(1) diff --git a/media.go b/media.go index fae67c4..691552b 100644 --- a/media.go +++ b/media.go @@ -25,7 +25,7 @@ func (m Media) Move(root string) error { } defer f.Close() - content := filepath.Join(root, "content", m.Hash[:2], m.Hash[2:]+m.Extension) + content := m.Content(root) if _, err := os.Stat(content); !os.IsNotExist(err) { return Dup{content} @@ -67,3 +67,8 @@ func (m Media) Move(root string) error { // return os.Symlink(rel, name) return os.Link(content, name) } + +// Content returns the content-address path starting at root. +func (m Media) Content(root string) string { + return filepath.Join(root, "content", m.Hash[:2], m.Hash[2:]+m.Extension) +}