-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjester.rb
More file actions
125 lines (113 loc) · 3.71 KB
/
jester.rb
File metadata and controls
125 lines (113 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env ruby
# Copyright (C) 2016 MLE Slone
# License: MIT
# TODO: Get a better way to do this.
libdir = File.expand_path File.join(File.dirname(__FILE__), 'lib')
$LOAD_PATH.unshift libdir unless $LOAD_PATH.include? libdir
require 'logger'
$logger = Logger.new(STDOUT)
$logger.level = Logger::INFO
$logger.formatter = proc do |severity, datetime, progname, msg|
"jester [#{datetime}]: #{msg}\n"
end
def say(message)
$logger.info(message)
end
dir = File.expand_path File.dirname(__FILE__)
lockfile = File.join dir, 'tmp', 'jester.lock'
say "acquiring lock"
if File.new(lockfile, 'w').flock( File::LOCK_NB | File::LOCK_EX )
say "lock acquired"
else
say "failed to acquire lock, exiting"
exit
end
require 'fileutils'
require 'find'
require 'jester'
require 'pairtree'
require 'parallel'
inbox = File.join dir, 'inbox'
todo = File.join dir, 'todo'
success = File.join dir, 'success'
failure = File.join dir, 'failure'
work = File.join dir, 'work'
Find.find(inbox) do |path|
if File.file? path
FileUtils.mv path, todo
end
end
jobs = []
Find.find(todo) do |path|
if File.file? path
jobs << File.basename(path)
end
end
say "queued #{jobs.count} jobs"
dipdir = ARGV[0]
outdir = ARGV[1]
if ARGV.length > 2
#base_url = "https://nyx.uky.edu/dipstest"
base_url = "https://exploreuk.uky.edu/dipstest"
else
#base_url = "https://nyx.uky.edu/dips"
base_url = "https://exploreuk.uky.edu/dips"
end
puts base_url
#STDERR.puts "howdy #{ARGV.join ' '}"
diptree = Pairtree.at(dipdir, :create => false)
outtree = Pairtree.at(outdir, :create => true)
Parallel.each(jobs) do |id|
begin
say "processing #{id}"
obj = diptree.get(id)
catalog_url = "http://exploreuk.uky.edu/catalog/#{id}"
mets = File.join obj, 'data', 'mets.xml'
reader = Jester::MetadataReader.new(mets)
repository = reader.repository(mets)
say "repository for #{id}: #{repository}"
ead_href = reader.get_ead(mets)
ead_url = "#{base_url}/#{id}/#{ead_href}"
raw_eadfile = File.join(obj, ead_href)
say "ead_url: #{ead_url}"
say "ead: #{raw_eadfile}"
eadfile = File.join(work, "#{id}.xml")
say "ead: #{raw_eadfile} -> #{eadfile}"
Jester::idify(raw_eadfile, eadfile)
todofile = File.join todo, id
begin
xml = IO.read(eadfile)
ead = ExploreEad.from_xml(xml)
reader = Jester::MetsReader.new(id, mets, base_url)
components = ExploreComponents.from_xml(xml)
special = ExploreSpecial.new xml
obj = outtree.mk(id)
indexer = FileIndexer.new({
flat: obj,
options: {document: ExploreEad, component: ExploreComponents, id: id},
})
say "splitting #{id}"
indexer.create(File.new(eadfile, 'r'))
say "writing header for #{id}"
obj.open('header.xml', 'w') do |f|
# XXX consider just passing special?
f.write Haml::Template.new("haml/header.haml", escape_html: false).render(Object.new, {:ead => ead, :components => indexer.top_components, :catalog_url => catalog_url, :ead_url => ead_url, :repository => repository, :special => special, :mets => reader})
end
say "reading daos from EAD"
printer = Jester::LinkPrinter.new(obj)
printer.insert_daos_from(xml)
say "reading links from METS"
reader.linksets.each do |linkset|
printer.insert_linkset(linkset)
end
say "printing bucketed links"
printer.print
FileUtils.mv todofile, success
rescue Exception => e
STDERR.puts e.inspect
FileUtils.mv todofile, failure
end
rescue Exception => e
STDERR.puts e.inspect
end
end