ensembl-hive-python3  2.3
Params.py
Go to the documentation of this file.
1 
2 import sys
3 import numbers
4 import collections
5 
6 __doc__ = """
7 This module is an implementation of eHive's Param module.
8 It defines ParamContainer which is an attribute of BaseRunnable
9 and not its base class as in eHive's class hierarchy.
10 All the specific warnings and exceptions inherit from ParamWarning
11 and ParamException.
12 """
13 
14 
15 class ParamWarning(Warning):
16  """Used by Process.BaseRunnable"""
17  pass
18 
19 
20 class ParamException(Exception):
21  """Base class for parameters-related exceptions"""
22  pass
24  """Raised when the parameter name is not a string"""
25  def __str__(self):
26  return '"{0}" (type {1}) is not a valid parameter name'.format(self.args[0], type(self.args[0]).__name__)
28  """Raised when ParamContainer tried to substitute an unexpected structure (only dictionaries and lists are accepted)"""
29  def __str__(self):
30  return 'Cannot substitute elements in objects of type "{0}"'.format(str(type(self.args[0])))
32  """Raised when parameters depend on each other, forming a loop"""
33  def __str__(self):
34  return "Substitution loop has been detected on {0}. Parameter-substitution stack: {1}".format(self.args[0], list(self.args[1].keys()))
35 
36 
37 class ParamContainer(object):
38  """Equivalent of eHive's Param module"""
39 
40  def __init__(self, unsubstituted_params, debug=False):
41  """Constructor. "unsubstituted_params" is a dictionary"""
42  self.unsubstituted_param_hash = unsubstituted_params.copy()
43  self.param_hash = {}
44  self.debug = debug
45 
46 
47  # Public methods
48 
49 
50  def set_param(self, param_name, value):
51  """Setter. Returns the new value"""
52  if not self.validate_parameter_name(param_name):
53  raise ParamNameException(param_name)
54  self.param_hash[param_name] = value
55  return value
56 
57  def get_param(self, param_name):
58  """Getter. Performs the parameter substitution"""
59  if not self.validate_parameter_name(param_name):
60  raise ParamNameException(param_name)
61  self.substitution_in_progress = collections.OrderedDict()
62  try:
63  return self.internal_get_param(param_name)
64  except (KeyError, SyntaxError, ParamException) as e:
65  # To hide the part of the stack that is in ParamContainer
66  raise type(e)(*e.args) from None
67 
68  def has_param(self, param_name):
69  """Returns a boolean. It checks both substituted and unsubstituted parameters"""
70  if not self.validate_parameter_name(param_name):
71  raise ParamNameException(param_name)
72  return (param_name in self.param_hash) or (param_name in self.unsubstituted_param_hash)
73 
74 
75  # Private methods
76 
77  def validate_parameter_name(self, param_name):
78  """Tells whether "param_name" is a non-empty string"""
79  return isinstance(param_name, str) and (param_name != '')
80 
81  def debug_print(self, *args, **kwargs):
82  """Print debug information if the debug flag is turned on (cf constructor)"""
83  if self.debug:
84  print(*args, **kwargs)
85 
86  def internal_get_param(self, param_name):
87  """Equivalent of get_param() that assumes "param_name" is a valid parameter name and hence, doesn't have to raise ParamNameException.
88  It is only used internally"""
89  self.debug_print("internal_get_param", param_name)
90  if param_name not in self.param_hash:
91  x = self.unsubstituted_param_hash[param_name]
92  self.param_hash[param_name] = self.param_substitute(x)
93  return self.param_hash[param_name]
94 
95 
96  def param_substitute(self, structure):
97  """
98  Take any structure and replace the pairs of hashes with the values of the parameters / expression they represent
99  Compatible types: numbers, strings, lists, dictionaries (otherwise, ParamSubstitutionException is raised)
100  """
101  self.debug_print("param_substitute", structure)
102 
103  if structure is None:
104  return None
105 
106  elif isinstance(structure, list):
107  return [self.param_substitute(_) for _ in structure]
108 
109  elif isinstance(structure, dict):
110  # NB: In Python, not everything can be hashed and used as a dictionary key.
111  # Perhaps we should check for such errors ?
112  return {self.param_substitute(key): self.param_substitute(value) for (key,value) in structure.items()}
113 
114  elif isinstance(structure, numbers.Number):
115  return structure
116 
117  elif isinstance(structure, str):
118 
119  # We handle the substitution differently if there is a single reference as we can avoid forcing the result to be a string
120 
121  if structure[:6] == '#expr(' and structure[-6:] == ')expr#' and structure.count('#expr(', 6, -6) == 0 and structure.count(')expr#', 6, -6) == 0:
122  return self.subst_one_hashpair(structure[1:-1], True)
123 
124  if structure[0] == '#' and structure[-1] == '#' and structure.count('#', 1, -1) == 0:
125  if len(structure) <= 2:
126  return structure
127  return self.subst_one_hashpair(structure[1:-1], False)
128 
129  # Fallback to the default parser: all pairs of hashes are substituted
130  return self.subst_all_hashpairs(structure, lambda middle_param: self.subst_one_hashpair(middle_param, False) )
131 
132  else:
133  raise ParamSubstitutionException(structure)
134 
135 
136  def subst_all_hashpairs(self, structure, callback):
137  """
138  Parse "structure" and replace all the pairs of hashes by the result of calling callback() on the pair content
139  #expr()expr# are treated differently by calling subst_one_hashpair()
140  The result is a string (like structure)
141  """
142  self.debug_print("subst_all_hashpairs", structure)
143  result = []
144  while True:
145  (head,_,tmp) = structure.partition('#')
146  result.append(head)
147  if _ != '#':
148  return ''.join(result)
149  if tmp.startswith('expr('):
150  i = tmp.find(')expr#')
151  if i == -1:
152  raise SyntaxError("Unmatched '#expr(' token")
153  val = self.subst_one_hashpair(tmp[:i+5], True)
154  tail = tmp[i+6:]
155  else:
156  (middle_param,_,tail) = tmp.partition('#')
157  if _ != '#':
158  raise SyntaxError("Unmatched '#' token")
159  if middle_param == '':
160  val = '##'
161  else:
162  val = callback(middle_param)
163  result.append(str(val))
164  structure = tail
165 
166 
167  def subst_one_hashpair(self, inside_hashes, is_expr):
168  """
169  Run the parameter substitution for a single pair of hashes.
170  Here, we only need to handle #expr()expr#, #func:params# and #param_name#
171  as each condition has been parsed in the other methods
172  """
173  self.debug_print("subst_one_hashpair", inside_hashes, is_expr)
174 
175  # Keep track of the substitutions we've made to detect loops
176  if inside_hashes in self.substitution_in_progress:
177  raise ParamInfiniteLoopException(inside_hashes, self.substitution_in_progress)
178  self.substitution_in_progress[inside_hashes] = 1
179 
180  # We ask the caller to provide the is_expr tag to avoid checking the string again for the presence of the "expr" tokens
181  if is_expr:
182  s = self.subst_all_hashpairs(inside_hashes[5:-5].strip(), lambda middle_param: 'self.internal_get_param("{0}")'.format(middle_param))
183  val = eval(s)
184 
185  elif ':' in inside_hashes:
186  (func_name,_,parameters) = inside_hashes.partition(':')
187  try:
188  f = eval(func_name)
189  except:
190  raise SyntaxError("Unknown method: " + func_name)
191  if callable(f):
192  if parameters:
193  val = f(self.internal_get_param(parameters))
194  else:
195  val = f()
196  else:
197  raise SyntaxError(func_name + " is not callable")
198 
199  else:
200  val = self.internal_get_param(inside_hashes)
201 
202  del self.substitution_in_progress[inside_hashes]
203  return val
204 
205 
206 
207 def __main():
208  seed_params = [
209  ('alpha' , 2),
210  ('beta' , 5),
211  ('delta' , '#expr( #alpha#*#beta# )expr#'),
212 
213  ('gamma' , [10,20,33,15]),
214  ('gamma_prime' , '#expr( #gamma# )expr#'),
215  ('gamma_second' , '#expr( list(#gamma#) )expr#'),
216 
217  ('age' , { 'Alice' : 17, 'Bob' : 20, 'Chloe' : 21}),
218  ('age_prime' , '#expr( #age# )expr#'),
219  ('age_second' , '#expr( dict(#age#) )expr#'),
220 
221  ('csv' , '[123,456,789]'),
222  ('csv_prime' , '#expr( #csv# )expr#'),
223  ('listref' , '#expr( eval(#csv#) )expr#'),
224 
225  ('null' , None),
226  ('ref_null' , '#null#'),
227  ('ref2_null' , '#expr( #null# )expr#'),
228  ('ref3_null' , '#alpha##null##beta#'),
229  ]
230 
231  p = ParamContainer(collections.OrderedDict(seed_params), False)
232 
233  def print_title(title):
234  print();
235  print("*" + title + "*")
236 
237  def print_substitution(title, param_string):
238  print(title)
239  print("\t>", param_string)
240  x = p.param_substitute(param_string)
241  print_param_value(x)
242 
243  def print_param_value(x):
244  print("\t=", x, type(x), "id=0x{0:012x}".format(id(x)))
245 
246  print_title("Exceptions")
247  try:
248  p.get_param('ppppppp')
249  except KeyError as e:
250  print("KeyError raised")
251  else:
252  print("KeyError NOT raised")
253  print()
254 
255  try:
256  p.get_param(0) # should raise ParamNameException
257  except ParamNameException as e:
258  print("ParamNameException raised")
259  else:
260  print("ParamNameException NOT raised")
261  print()
262 
263  try:
264  ParamContainer({'a': '#b#', 'b': '#a#'}, True).get_param('a')
265  except ParamInfiniteLoopException as e:
266  print("ParamInfiniteLoopException raised")
267  else:
268  print("ParamInfiniteLoopException NOT raised")
269  print()
270 
271  print_title('All the parameters')
272  for (key,value) in seed_params:
273  print("\t>", key, "is seeded as:", value, type(value))
274  x = p.get_param(key)
275  print_param_value(x)
276  print()
277 
278  print_title("Numbers")
279  print_substitution( "Scalar substitutions", "#alpha# and another: #beta# and again one: #alpha# and the other: #beta# . Their product: #delta#" )
280 
281  print_title("Lists")
282  print_substitution( "default stringification of gamma", "#gamma#" )
283  print_substitution( "expr-stringification of gamma", "#expr( #gamma# )expr#" )
284  print_substitution( "complex join of gamma", "#expr( '~'.join([str(_) for _ in sorted(#gamma#)]) )expr#" )
285  print_substitution( "complex join of gamma_prime", "#expr( '~'.join([str(_) for _ in sorted(#gamma_prime#)]) )expr#" )
286 
287  print_title("Global methods")
288  print_substitution( "sum(gamma)", "#expr( sum(#gamma#) )expr#" )
289  print_substitution( "min(gamma)", "#expr( min(#gamma#) )expr#" )
290  print_substitution( "max(gamma)", "#expr( max(#gamma#) )expr#" )
291 
292  print_title("Dictionaries")
293  print_substitution( "default stringification of age", "#age#" )
294  print_substitution( "expr-stringification of age", "#expr( #age# )expr#" )
295  print_substitution( "complex fold of age", '#expr( "\t".join(["{0} is {1} years old".format(p,a) for (p,a) in #age#.items()]) )expr#' )
296  print_substitution( "complex fold of age_prime", '#expr( "\t".join(["{0} is {1} years old".format(p,a) for (p,a) in #age_prime#.items()]) )expr#' )
297 
298  print_title("With indexes")
299  print_substitution( "adding indexed values", '#expr( #age#["Alice"]+max(#gamma#)+#listref#[0] )expr#' )
300 
301  print_title("Modifications of gamma")
302  p.get_param('gamma').append("val0")
303  print("\tgamma", p.get_param('gamma'))
304  print("\tgamma_prime", p.get_param('gamma_prime'))
305  print("\tgamma_second", p.get_param('gamma_second'))
306 
307 
308 if __name__ == '__main__':
309  __main()
310 
Base class for parameters-related exceptions.
Definition: Params.py:23
Raised when ParamContainer tried to substitute an unexpected structure (only dictionaries and lists a...
Definition: Params.py:32
def validate_parameter_name(self, param_name)
Tells whether "param_name" is a non-empty string.
Definition: Params.py:89
def subst_all_hashpairs(self, structure, callback)
Parse "structure" and replace all the pairs of hashes by the result of calling callback() on the pair...
Definition: Params.py:156
def get_param(self, param_name)
Getter.
Definition: Params.py:67
def debug_print(self, args, kwargs)
Print debug information if the debug flag is turned on (cf constructor)
Definition: Params.py:94
def internal_get_param(self, param_name)
Equivalent of get_param() that assumes "param_name" is a valid parameter name and hence...
Definition: Params.py:101
Equivalent of eHive&#39;s Param module.
Definition: Params.py:44
def has_param(self, param_name)
Returns a boolean.
Definition: Params.py:79
def subst_one_hashpair(self, inside_hashes, is_expr)
Run the parameter substitution for a single pair of hashes.
Definition: Params.py:188
def set_param(self, param_name, value)
Setter.
Definition: Params.py:59
Used by Process.BaseRunnable.
Definition: Params.py:17
Raised when the parameter name is not a string.
Definition: Params.py:27
def param_substitute(self, structure)
Take any structure and replace the pairs of hashes with the values of the parameters / expression the...
Definition: Params.py:114
def __main()
Definition: Params.py:223
Raised when parameters depend on each other, forming a loop.
Definition: Params.py:37