Skip to content

Commit d6f5994

Browse files
Merge pull request #9 from franTarkenton/master
New Schedule module to FMEUtil
2 parents 8116e56 + aa3a4b0 commit d6f5994

File tree

4 files changed

+432
-51
lines changed

4 files changed

+432
-51
lines changed

FMEUtil/FMEUtil/FMEScheduleLib.py

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
'''
2+
Created on Jul 13, 2018
3+
4+
@author: kjnether
5+
'''
6+
import copy
7+
import datetime
8+
import json
9+
import logging
10+
import os.path
11+
import pprint
12+
13+
import FMEUtil.PyFMEServerV2
14+
import deepdiff
15+
16+
17+
class Schedules(object):
18+
19+
'''
20+
Provides a api to work with a schedule. Mostly provides caching functionality
21+
Caching exists due to extremely poor performance of schedule queries against
22+
fme server 2015.
23+
:ivar fme: a fme server object
24+
:type fme: FMEUtil.PyFMEServerV2.FMEServer
25+
26+
:ivar scheds: fme schedule object
27+
:type scheds: FMEUtil.PyFMEServerV2.Schedule
28+
29+
:ivar schedStruct: a data structure describing the schedules for the
30+
fme server instance that was specified
31+
:type schedStruct: dict
32+
33+
:ivar schedStructComparison: This is a duplication of schedStruct with the
34+
fields described in flds2Ignore removed from the structures
35+
within this list
36+
37+
:ivar flds2Ignore: fields that should be ignored when doing any comparison
38+
operations. (in, -, +, etc)
39+
:type flds2Ignore: list
40+
'''
41+
42+
def __init__(self, fmeServUrl, fmeServToken, cacheLabel, cacheDir,
43+
refreshCache=False, ignorePP=False):
44+
'''
45+
:param fmeServUrl: url to fme server, don't include paths
46+
:param fmeServToken: token to fme server
47+
:param cacheLabel: a label that is used to calculate the schedule cache
48+
file name
49+
:param cacheDir: directory where the cache file should be located
50+
:param refreshCache: whether the cache should be refreshed or not.
51+
cached schedules have a date on them. Never
52+
only valid for the day that they were generated
53+
for.
54+
'''
55+
self.logger = logging.getLogger(__name__)
56+
self.pp = pprint.PrettyPrinter(indent=4)
57+
self.logger.debug("test log config")
58+
self.fme = FMEUtil.PyFMEServerV2.FMEServer(fmeServUrl, fmeServToken)
59+
self.scheds = self.fme.getSchedules()
60+
dateTimeStamp = datetime.datetime.now().strftime('%Y-%m-%d')
61+
# cacheDir = os.path.normpath(os.path.join(os.path.dirname(__file__),
62+
# '..', 'data'))
63+
self.cacheFile = 'scheds_{0}_{1}.json'.format(cacheLabel, dateTimeStamp)
64+
self.cacheFile = os.path.join(cacheDir, self.cacheFile)
65+
if refreshCache:
66+
if os.path.exists(self.cacheFile):
67+
os.remove(self.cacheFile)
68+
self.schedStruct = None
69+
70+
# These are fields in the schedules that should be ignored
71+
# when doing comparisons between the two structures
72+
self.flds2Ignore = ['begin', 'enabled']
73+
# this switch can be set using the method setIgnoredFields()
74+
# when set to true published parameters are not considered in the
75+
# comparison of the schedules.
76+
self.ignorePP = ignorePP
77+
# this data struct will get populated with everything in self.schedStruct
78+
# except the fields described in flds2Ignore
79+
self.schedStructComparison = []
80+
self.getSchedules()
81+
82+
def getPyFMESchedule(self):
83+
'''
84+
:return: the pyFMEServer schedule object used in this class
85+
:type param: FMEUtil.PyFMEServerV2.Schedule
86+
'''
87+
return self.scheds
88+
89+
def getPyFME(self):
90+
'''
91+
:return: a FMEServer object
92+
:rtype: FMEUtil.PyFMEServerV2.FMEServer:
93+
'''
94+
return self.fme
95+
96+
def getScheduleData(self):
97+
'''
98+
:return: the data structure that describes the schedules in this
99+
object.
100+
'''
101+
return self.schedStruct
102+
103+
def setIgnoredFields(self, flds, ignorePublishedParameters=True):
104+
'''
105+
When doing a comparison between schedule objects you can set a
106+
list of fields to ignore when doing the comparison. There are
107+
certain fields that are set by default, this method allows you to
108+
define your own fields.
109+
:param flds: a list of fields that should be ignored when comparing
110+
schedule objects
111+
'''
112+
self.ignorePP = ignorePublishedParameters
113+
self.flds2Ignore = flds
114+
schedStructCleaned = []
115+
# schedIterator = self.schedStruct[0:]
116+
schedIterator = copy.deepcopy(self.schedStruct)
117+
for schedRef in schedIterator:
118+
sched = schedRef.copy()
119+
for fld2Del in self.flds2Ignore:
120+
if fld2Del in sched:
121+
# self.logger.debug("cleaning entry for: {0}".format(fld2Del))
122+
del sched[fld2Del]
123+
if self.ignorePP:
124+
# getting rid of published parameters too!
125+
# self.pp.pprint(sched)
126+
del sched['request']['publishedParameters']
127+
schedStructCleaned.append(sched)
128+
self.schedStructComparison = schedStructCleaned
129+
130+
def isEnabled(self, scheduleName):
131+
'''
132+
:return: indicates if the schedule is enabled or not
133+
:rtype: boolean
134+
'''
135+
schedStruct = self.getScheduleByName(scheduleName)
136+
return schedStruct['enabled']
137+
138+
def getSchedules(self):
139+
'''
140+
If no cache file exists then gets the scheds from fme server,
141+
otherwise loads from the cache file
142+
143+
Also populates schedStructComparison which is the structure that is used
144+
for comparisons of data structures. It has some fields removed that should
145+
not be used for comparison operations
146+
'''
147+
cacheFile = os.path.basename(self.cacheFile)
148+
if os.path.exists(self.cacheFile):
149+
with open(self.cacheFile) as f:
150+
msg = 'loading the schedules from the cache file {0}'.format(cacheFile)
151+
self.logger.info(msg)
152+
schedStruct = json.load(f)
153+
else:
154+
self.logger.info("retrieving the schedules from fme server, this" + \
155+
"may take a while")
156+
schedStruct = self.scheds.getSchedules()
157+
self.logger.debug("schedStruct: {0}".format(schedStruct))
158+
with open(self.cacheFile, 'w') as outfile:
159+
msg = "dumping the schedules to the cache file {0}"
160+
self.logger.info(msg.format(cacheFile))
161+
json.dump(schedStruct, outfile)
162+
163+
self.schedStruct = schedStruct
164+
self.setIgnoredFields(self.flds2Ignore, self.ignorePP)
165+
# self.logger.debug("schedStruct: {0}".format(self.schedStruct))
166+
167+
def getScheduleByName(self, scheduleName):
168+
'''
169+
searches through the schedules for a schedule with the name 'scheduleName'
170+
and returns it.
171+
172+
returns None if no schedule is found
173+
'''
174+
self.logger.debug("getting the parameters for the schedule: {0}".format(scheduleName))
175+
retVal = None
176+
for sched in self.schedStruct:
177+
if sched['name'] == scheduleName:
178+
retVal = sched
179+
break
180+
return retVal
181+
182+
def __contains__(self, sched): # pylint: disable=invalid-name
183+
'''
184+
:param sched: returns true or false based on whether the sched
185+
object exists in this collection of schedules
186+
:type param: Schedule
187+
'''
188+
# clean the submitted schedule
189+
# self.logger.debug("called equivalent of 'in'")
190+
retVal = False
191+
schedCleaned = {}
192+
for fld in sched.keys():
193+
if fld not in self.flds2Ignore:
194+
schedCleaned[fld] = copy.deepcopy(sched[fld])
195+
# if the ignore published parameters flag is set then
196+
# don't look at them if they are defined.
197+
if self.ignorePP:
198+
if 'request' in schedCleaned:
199+
if 'publishedParameters' in schedCleaned:
200+
del schedCleaned['request']['publishedParameters']
201+
202+
if schedCleaned in self.schedStructComparison:
203+
retVal = True
204+
else:
205+
for curSched in self.schedStructComparison:
206+
if curSched['name'] == schedCleaned['name']:
207+
diffs = deepdiff.DeepDiff(schedCleaned, curSched)
208+
self.logger.info("differences for {1}: {0}".format(diffs, curSched['name']))
209+
return retVal
210+
211+
def __sub__(self, schedules): # pylint: disable=invalid-name
212+
'''
213+
identifies schedules that are in self, but not in supplied
214+
schedules
215+
'''
216+
retVals = []
217+
for sched in self.schedStructComparison:
218+
if sched not in schedules:
219+
retVals.append(sched)
220+
return retVals
221+
222+
223+
class Parameters(object):
224+
'''
225+
published parameters returned by existing schedules often include
226+
scripted parameters. When creating a schedule you cannot specify
227+
scripted parameters as they are... well... SCRIPTED!
228+
229+
This class provides some methods that allow you to retrieve the
230+
published parameters associated with a repository/workspace in
231+
a format that can be used to construct a schedule, ie will not include
232+
scripted parameters in its reference.
233+
'''
234+
235+
def __init__(self, schedule, scheduleName):
236+
self.logger = logging.getLogger(__name__)
237+
self.schedule = schedule
238+
self.scheduleStruct = self.schedule.getScheduleByName(scheduleName)
239+
240+
def getPublishedParameters(self):
241+
'''
242+
:return: the published parameters associated with the specified schedule.
243+
These are retrieved not from the schedule but from the FMW that the
244+
schedule calls.
245+
'''
246+
workspcName = self.scheduleStruct['workspace']
247+
repoName = self.scheduleStruct['repository']
248+
249+
fme = self.schedule.getPyFME()
250+
repo = fme.getRepository()
251+
wrkspcs = repo.getWorkspaces(repoName)
252+
pubParams = wrkspcs.getPublishedParams4Schedule(workspcName)
253+
return pubParams
254+
255+
def fixSchedulePublishedParameters(self):
256+
'''
257+
:return: a schedule json struct that can be sent to FME Server to define
258+
a new schedule.
259+
'''
260+
# published parameters retrieved from the workspace on fme server
261+
pubParams = self.getPublishedParameters()
262+
# published parameters associated with the current schedule.
263+
# includes scripted parameters, which can not be used when redefining
264+
# a schedule.
265+
pp = pprint.PrettyPrinter(indent=4)
266+
pp.pprint(self.scheduleStruct)
267+
schedulePubParams = self.scheduleStruct['request']['publishedParameters']
268+
269+
# fixedSchedule = self.scheduleStruct['request']['publishedParameters'][0:]
270+
271+
# Iterating through the published parameters associated with the
272+
# workspace and overriding values with values that were retrieved
273+
# from the schedule.
274+
params4Schedule = []
275+
for pubParam in pubParams:
276+
paramName = pubParam['name']
277+
for schedParams in schedulePubParams:
278+
if schedParams['name'] == paramName:
279+
msg = 'updating the schedule parameter {0} from {1} to {2}'
280+
msg = msg.format(schedParams['name'], pubParam['value'], schedParams['value'])
281+
self.logger.info(msg)
282+
pubParam['value'] = schedParams['value']
283+
params4Schedule.append(pubParam)
284+
285+
# pubParams will have name / defaultvalue keys, need to modify for
286+
# schedules to name/value
287+
# import pprint
288+
# pp = pprint.PrettyPrinter(indent=4)
289+
# pp.pprint(params4Schedule)
290+
# raise
291+
self.scheduleStruct['request']['publishedParameters'] = params4Schedule
292+
return self.scheduleStruct

0 commit comments

Comments
 (0)