simple tool to de-duplicate and arrange media.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arrange.go 5.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. package arrange
  2. import (
  3. "crypto/md5"
  4. "fmt"
  5. "image/gif"
  6. "image/jpeg"
  7. "image/png"
  8. "io"
  9. "log"
  10. "os"
  11. "path/filepath"
  12. "strings"
  13. "sync"
  14. "time"
  15. )
  16. var exts map[string]bool
  17. func init() {
  18. exts = map[string]bool{
  19. // images
  20. ".jpg": true,
  21. ".jpeg": true,
  22. ".png": true,
  23. ".gif": true,
  24. // videos
  25. ".mov": true,
  26. ".mp4": true,
  27. ".m4v": true,
  28. ".avi": true,
  29. }
  30. }
  31. func mtime(path string) (time.Time, error) {
  32. ti := time.Time{}
  33. s, err := os.Stat(path)
  34. if err != nil {
  35. return ti, fmt.Errorf("failure to collect times from stat: %v", err)
  36. }
  37. return s.ModTime(), nil
  38. }
  39. // PrepOutput creates all possible content-address prefix directories.
  40. func PrepOutput(root string) error {
  41. for i := 0; i <= 0xff; i++ {
  42. dirname := filepath.Join(root, "content", fmt.Sprintf("%02x", i))
  43. if err := os.MkdirAll(dirname, 0755); err != nil {
  44. return err
  45. }
  46. }
  47. if err := os.MkdirAll(filepath.Join(root, "date"), 0755); err != nil {
  48. return err
  49. }
  50. return nil
  51. }
  52. // Source returns sends all files that match known extensions.
  53. func Source(root string) <-chan string {
  54. out := make(chan string)
  55. go func() {
  56. err := filepath.Walk(
  57. root,
  58. func(path string, info os.FileInfo, err error) error {
  59. if err != nil {
  60. return err
  61. }
  62. if info.IsDir() {
  63. return nil
  64. }
  65. ext := strings.ToLower(filepath.Ext(path))
  66. if _, ok := exts[ext]; ok {
  67. out <- path
  68. }
  69. return nil
  70. },
  71. )
  72. if err != nil {
  73. log.Printf("problem during crawl: %+v", err)
  74. }
  75. close(out)
  76. }()
  77. return out
  78. }
  79. // Parse runs the file parser for each file on input chan, and sends results
  80. // down output chan.
  81. //
  82. // Exists so that it can be called many times concurrently.
  83. func Parse(in <-chan string) <-chan Media {
  84. out := make(chan Media)
  85. go func() {
  86. for path := range in {
  87. f, err := ParseFile(path)
  88. if err != nil {
  89. switch err.(type) {
  90. case NotMedia:
  91. log.Printf("%+v", err)
  92. default:
  93. log.Printf("parse error: %+v", err)
  94. }
  95. continue
  96. } else {
  97. out <- f
  98. }
  99. }
  100. close(out)
  101. }()
  102. return out
  103. }
  104. // MissingLink detects if the values coming from medias is a duplicate file
  105. // rather than a hardlink to the content store.
  106. func MissingLink(medias <-chan Media, root string) (<-chan Media, <-chan error) {
  107. out := make(chan Media)
  108. errs := make(chan error)
  109. go func() {
  110. for m := range medias {
  111. var d, c os.FileInfo
  112. var err error
  113. if d, err = os.Stat(m.Path); err != nil {
  114. errs <- err
  115. }
  116. if c, err = os.Stat(m.Content(root)); err != nil {
  117. errs <- err
  118. }
  119. if !os.SameFile(d, c) {
  120. out <- m
  121. }
  122. }
  123. close(errs)
  124. close(out)
  125. }()
  126. return out, errs
  127. }
  128. // Move calls Move on each Media on input chan. It is the first step in the
  129. // pipeline after fan-in.
  130. func Move(in <-chan Media, root string) <-chan error {
  131. out := make(chan error)
  132. go func() {
  133. for i := range in {
  134. out <- i.Move(root)
  135. }
  136. close(out)
  137. }()
  138. return out
  139. }
  140. // ParseFile extracts metadata from single file.
  141. func ParseFile(path string) (Media, error) {
  142. ext := strings.ToLower(filepath.Ext(path))
  143. var r Media
  144. hash := md5.New()
  145. var t time.Time
  146. f, err := os.Open(path)
  147. if err != nil {
  148. return r, fmt.Errorf("problem opening file: %v", err)
  149. }
  150. defer f.Close()
  151. switch ext {
  152. default:
  153. return r, NotMedia{path}
  154. case ".jpg", ".jpeg":
  155. if _, err := jpeg.DecodeConfig(f); err != nil {
  156. return r, NotMedia{path}
  157. }
  158. if _, err := f.Seek(0, 0); err != nil {
  159. return r, fmt.Errorf("couldn't seek back in file: %v", err)
  160. }
  161. // try a few things for a time value
  162. {
  163. success := false
  164. if t, err = parseExif(f); err == nil {
  165. success = true
  166. }
  167. if !success {
  168. t, err = mtime(path)
  169. }
  170. if err != nil {
  171. return r, fmt.Errorf("unable to calculate reasonble time for jpg %q: %v", path, err)
  172. }
  173. }
  174. case ".png":
  175. if _, err := png.DecodeConfig(f); err != nil {
  176. return r, NotMedia{path}
  177. }
  178. if _, err := f.Seek(0, 0); err != nil {
  179. return r, fmt.Errorf("couldn't seek back in file: %v", err)
  180. }
  181. t, err = mtime(path)
  182. if err != nil {
  183. return r, fmt.Errorf("unable to calculate reasonble time for media %q: %v", path, err)
  184. }
  185. case ".gif":
  186. if _, err := gif.DecodeConfig(f); err != nil {
  187. return r, NotMedia{path}
  188. }
  189. if _, err := f.Seek(0, 0); err != nil {
  190. return r, fmt.Errorf("couldn't seek back in file: %v", err)
  191. }
  192. t, err = mtime(path)
  193. if err != nil {
  194. return r, fmt.Errorf("unable to calculate reasonble time for media %q: %v", path, err)
  195. }
  196. case ".mov", ".mp4", ".m4v", ".avi":
  197. t, err = mtime(path)
  198. if err != nil {
  199. return r, fmt.Errorf("unable to calculate reasonble time for media %q: %v", path, err)
  200. }
  201. }
  202. if _, err := f.Seek(0, 0); err != nil {
  203. return r, fmt.Errorf("couldn't seek back in file: %v", err)
  204. }
  205. if _, err := io.Copy(hash, f); err != nil {
  206. return r, fmt.Errorf("problem calculating checksum on %q: %v", path, err)
  207. }
  208. r = Media{
  209. Path: path,
  210. Hash: fmt.Sprintf("%x", hash.Sum(nil)),
  211. Extension: ext,
  212. Time: t,
  213. }
  214. return r, nil
  215. }
  216. // Merge implements fan-in.
  217. func Merge(cs []<-chan Media) <-chan Media {
  218. out := make(chan Media)
  219. var wg sync.WaitGroup
  220. output := func(c <-chan Media) {
  221. for n := range c {
  222. out <- n
  223. }
  224. wg.Done()
  225. }
  226. for _, c := range cs {
  227. go output(c)
  228. }
  229. wg.Add(len(cs))
  230. go func() {
  231. wg.Wait()
  232. close(out)
  233. }()
  234. return out
  235. }