You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

template.py 17KB

11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. Template module for a Kai's Text Tools.
  5. (c) 2013 Ivan "Kai SD" Korystin
  6. License: GPLv3
  7. '''
  8. import re
  9. class TemplateV3(object):
  10. '''
  11. Class for reading ATGv3 templates.
  12. '''
  13. pass
  14. class TemplateV2(object):
  15. '''
  16. Class for reading ATGv2 templates.
  17. '''
  18. def __init__(self, filename=None, encoding='utf-8', text=''):
  19. '''
  20. Constructor
  21. '''
  22. if filename:
  23. with open(filename, 'r') as templateFile:
  24. topline = templateFile.readline().decode(encoding)
  25. if not topline.startswith('ATGV2'):
  26. raise BaseException('%s is not an ATGv2 template' % (filename))
  27. key = templateFile.readline().decode(encoding)
  28. if key[:2] == '[$' and key[-3:-1] == '$]':
  29. keyInfo = key[2:-2].split('$')
  30. if len(keyInfo) < 4:
  31. raise BaseException('%s has bad ATGv2 key' % (filename))
  32. self.keyField = keyInfo[0]
  33. self.extension = keyInfo[1]
  34. self.prefix = keyInfo[2]
  35. self.encoding = keyInfo[3]
  36. if 'oneFile' in keyInfo[4:]:
  37. self.oneFile = True
  38. else:
  39. self.oneFile = False
  40. if 'transpose' in keyInfo[4:]:
  41. self.transpose = True
  42. else:
  43. self.transpose = False
  44. self.text = u''
  45. else:
  46. raise BaseException('%s has bad ATGv2 key' % (filename))
  47. for i in templateFile.readlines():
  48. self.text += i.decode(encoding)
  49. else:
  50. self.text = text
  51. self.key = u''
  52. self.footer = u''
  53. self.replacement = {}
  54. self._data = None
  55. self._multiWords = None
  56. def parse(text):
  57. topParts = []
  58. matches = {}
  59. openers = re.finditer('\[\$.*?\$', text)
  60. closers = re.finditer('\$\]', text)
  61. ops = []
  62. try:
  63. cl = closers.next()
  64. while not cl is None:
  65. try:
  66. op = openers.next()
  67. if op.start() < cl.start():
  68. ops.append(op)
  69. else:
  70. idx = -1
  71. try:
  72. while ops[idx].start() > cl.start():
  73. idx -= 1
  74. except:
  75. raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
  76. matches[ops[idx]] = cl
  77. if len(ops) == 1 or idx == -len(ops):
  78. topParts.append(ops[idx])
  79. del ops[idx]
  80. ops.append(op)
  81. try:
  82. cl = closers.next()
  83. except StopIteration:
  84. cl = None
  85. except StopIteration:
  86. idx = -1
  87. try:
  88. while ops[idx].start() > cl.start():
  89. idx -= 1
  90. except:
  91. raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
  92. matches[ops[idx]] = cl
  93. if len(ops) == 1 or idx == -len(ops):
  94. topParts.append(ops[idx])
  95. del ops[idx]
  96. try:
  97. cl = closers.next()
  98. except StopIteration:
  99. cl = None
  100. except StopIteration:
  101. pass
  102. parts = []
  103. for i in topParts:
  104. startPoint = i.end()
  105. endPoint = matches[i].start()
  106. p = (i.group()[2:-1], text[startPoint:endPoint])
  107. if p[0].startswith('ATG'):
  108. parts.insert(0, p)
  109. else:
  110. parts.append(p)
  111. return parts
  112. partCommands = {}
  113. def plain(index, flow, keytag):
  114. if not keytag in self._data.keys:
  115. self.warning('WARNING: keyword not found in table - %s' % (keytag))
  116. return flow
  117. return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index]))
  118. partCommands['_ATGPLAIN'] = plain
  119. def nPlain(index, flow, keytag, number):
  120. if not keytag+str(number) in self._data.keys:
  121. self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number)))
  122. return flow
  123. return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index]))
  124. def lIndex(index, flow, keytag, number):
  125. return flow.replace('[$ATGLINDEX$]', str(number))
  126. def addkey(index, flow, text):
  127. if self.key.find(text) < 0:
  128. self.key += text
  129. key = '[$ATGkey$' + text + '$]'
  130. return flow.replace(key,'')
  131. partCommands['ATGkey'] = addkey
  132. def addFooter(index, flow, text):
  133. if self.footer.find(text) < 0:
  134. self.footer += text
  135. key = '[$ATGFOOTER$' + text + '$]'
  136. return flow.replace(key,'')
  137. partCommands['ATGFOOTER'] = addFooter
  138. def addList(index, flow, string):
  139. key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  140. sub = string[len(string.split('$')[0])+1:]
  141. keyTag = string.split('$')[0]
  142. subparts = parse(sub)
  143. myText = u''
  144. if not keyTag in self._multiWords:
  145. self.warning('Keytag %s is not multiple!' % (keyTag))
  146. return flow
  147. for j in xrange(1, self._multiWords[keyTag]+1):
  148. subText = sub
  149. for sp in subparts:
  150. if sp[0] in self._multiWords:
  151. subText = nPlain(index, subText, sp[0], j)
  152. elif sp[0] == 'ATGLINDEX':
  153. subText = lIndex(index, subText, sp[0], j)
  154. elif sp[0] in partCommands:
  155. subText = partCommands[sp[0]](index, subText, sp[1])
  156. elif sp[1] == '':
  157. subText = plain(index, subText, sp[0])
  158. else:
  159. self.warning('Warning: unknown command '+sp[0])
  160. if not self._data[keyTag+str(j), index] == u'':
  161. myText += subText
  162. return flow.replace(key, myText)
  163. partCommands['ATGLIST'] = addList
  164. def addListCut(index, flow, string):
  165. key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  166. sub = string[len(string.split('$')[0])+1:]
  167. keyTag = string.split('$')[0]
  168. subparts = parse(sub)
  169. myText = u''
  170. if not keyTag in self._multiWords:
  171. self.warning('Keytag %s is not multiple!' % (keyTag))
  172. return flow
  173. for j in xrange(1, self._multiWords[keyTag]+1):
  174. subText = sub
  175. for sp in subparts:
  176. if sp[0] in self._multiWords:
  177. subText = nPlain(index, subText, sp[0], j)
  178. elif sp[0] == 'ATGLINDEX':
  179. subText = lIndex(index, subText, sp[0], j)
  180. elif sp[0] in partCommands:
  181. subText = partCommands[sp[0]](index, subText, sp[1])
  182. elif sp[1] == '':
  183. subText = plain(index, subText, sp[0])
  184. else:
  185. self.warning('Warning: unknown command '+sp[0])
  186. if not self._data[keyTag+str(j), index] == u'':
  187. myText += subText
  188. return flow.replace(key, myText[:-1])
  189. partCommands['ATGLISTCUT'] = addListCut
  190. def addIf(index, flow, string):
  191. key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  192. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  193. keyTag = string.split('$')[0]
  194. targetValue = string.split('$')[1]
  195. subparts = parse(sub)
  196. myText = u''
  197. if self._data[keyTag, 0] == []:
  198. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  199. return flow
  200. if unicode(self._data[keyTag, index]) == unicode(targetValue):
  201. subText = sub
  202. for sp in subparts:
  203. if sp[0] in partCommands:
  204. subText = partCommands[sp[0]](index, subText, sp[1])
  205. elif sp[1] == '':
  206. subText = plain(index, subText, sp[0])
  207. else:
  208. self.warning('Warning: unknown command '+sp[0])
  209. myText += subText
  210. return flow.replace(key, myText)
  211. partCommands['ATGIF'] = addIf
  212. def addIfNot(index, flow, string):
  213. key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  214. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  215. keyTag = string.split('$')[0]
  216. targetValue = string.split('$')[1]
  217. subparts = parse(sub)
  218. myText = u''
  219. if self._data[keyTag, 0] == []:
  220. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  221. return flow
  222. if not unicode(self._data[keyTag, index]) == unicode(targetValue):
  223. subText = sub
  224. for sp in subparts:
  225. if sp[0] in partCommands:
  226. subText = partCommands[sp[0]](index, subText, sp[1])
  227. elif sp[1] == '':
  228. subText = plain(index, subText, sp[0])
  229. else:
  230. self.warning('Warning: unknown command '+sp[0])
  231. myText += subText
  232. return flow.replace(key, myText)
  233. partCommands['ATGIFNOT'] = addIfNot
  234. def addGreater(index, flow, string):
  235. key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  236. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  237. keyTag = string.split('$')[0]
  238. targetValue = string.split('$')[1]
  239. subparts = parse(sub)
  240. myText = u''
  241. if self._data[keyTag, 0] == []:
  242. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  243. return flow
  244. try:
  245. if float(self._data[keyTag, index]) > float(targetValue):
  246. subText = sub
  247. for sp in subparts:
  248. if sp[0] in partCommands:
  249. subText = partCommands[sp[0]](index, subText, sp[1])
  250. elif sp[1] == '':
  251. subText = plain(index, subText, sp[0])
  252. else:
  253. self.warning('Warning: unknown command '+sp[0])
  254. myText += subText
  255. except:
  256. self.warning('ERROR: trying to compare uncomparable values!')
  257. return flow.replace(key, myText)
  258. partCommands['ATGGREATER'] = addGreater
  259. def addLess(index, flow, string):
  260. key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  261. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  262. keyTag = string.split('$')[0]
  263. targetValue = string.split('$')[1]
  264. subparts = parse(sub)
  265. myText = u''
  266. if self._data[keyTag, 0] == []:
  267. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  268. return flow
  269. try:
  270. if float(self._data[keyTag, index]) < float(targetValue):
  271. subText = sub
  272. for sp in subparts:
  273. if sp[0] in partCommands:
  274. subText = partCommands[sp[0]](index, subText, sp[1])
  275. elif sp[1] == '':
  276. subText = plain(index, subText, sp[0])
  277. else:
  278. self.warning('Warning: unknown command '+sp[0])
  279. myText += subText
  280. except:
  281. self.warning('ERROR: trying to compare uncomparable values!')
  282. return flow.replace(key, myText)
  283. partCommands['ATGLESS'] = addLess
  284. def addReplace(index, flow, string):
  285. key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  286. targetString = string[len(string.split('$')[0])+1:]
  287. srcString = string.split('$')[0]
  288. self.replacement[srcString] = targetString
  289. key = '[$ATGREPLACE$' + string + '$]'
  290. return flow.replace(key,'')
  291. partCommands['ATGREPLACE'] = addReplace
  292. def addPrefix(index, flow, string):
  293. key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  294. sub = string
  295. subparts = parse(sub)
  296. for sp in subparts:
  297. if sp[0] in partCommands:
  298. sub = partCommands[sp[0]](index, sub, sp[1])
  299. elif sp[1] == '':
  300. sub = plain(index, sub, sp[0])
  301. else:
  302. self.warning('Warning: unknown command '+sp[0])
  303. self.bonusPrefix += sub
  304. key = '[$ATGPREFIX$' + string + '$]'
  305. return flow.replace(key,'')
  306. partCommands['ATGPREFIX'] = addPrefix
  307. def skip(index, flow, string):
  308. return u'[$ATGSKIP_DO$]'
  309. partCommands['ATGSKIP'] = skip
  310. def prev(index, flow, string):
  311. key = '[$ATGPREV$%s$]' % (string.split('$')[0])
  312. keytag = string.split('$')[0]
  313. if self._data[keytag, 0] == []:
  314. self.warning('WARNING: keyword not found in table - %s' % (keytag))
  315. return flow
  316. if index == 0:
  317. self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0')
  318. return u'[$ATGSKIP_DO$]'
  319. return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1]))
  320. partCommands['ATGPREV'] = prev
  321. self.commands = partCommands
  322. self.parts = parse(self.text)
  323. def process(self, data):
  324. self._data = data
  325. multiWords = {}
  326. numbs = ('1','2','3','4','5','6','7','8','9','0')
  327. for i in data.keys:
  328. multi = False
  329. while i[-1] in numbs:
  330. i = i[:-1]
  331. multi = True
  332. if multi:
  333. if i in multiWords:
  334. multiWords[i] += 1
  335. else:
  336. multiWords[i] = 1
  337. self._multiWords = multiWords
  338. if self.oneFile:
  339. out = ''
  340. else:
  341. out = {}
  342. index = 0
  343. partCommands = self.commands
  344. for element in data.col_by_key(self.keyField):
  345. self.bonusPrefix = self.prefix
  346. text = self.text
  347. for i in self.parts:
  348. if i[0] in partCommands:
  349. text = partCommands[i[0]](index, text, i[1])
  350. elif i[1] == u'':
  351. text = partCommands['_ATGPLAIN'](index, text, i[0])
  352. else:
  353. self.warning('Warning: unknown command '+i[0])
  354. for i in self.replacement:
  355. text = text.replace(i, self.replacement[i])
  356. self.replacement = {}
  357. index += 1
  358. if u'[$ATGSKIP_DO$]' in text:
  359. self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.')
  360. else:
  361. if self.oneFile:
  362. out += text
  363. else:
  364. name = self.bonusPrefix + unicode(element)
  365. out[name] = text
  366. self.log('Created %s' % (element))
  367. if self.oneFile:
  368. out = self.key + out + self.footer
  369. return out
  370. def warning(self, text):
  371. print text
  372. def log(self, text):
  373. pass
  374. @staticmethod
  375. def express(cls, text, **kwargs):
  376. obj = cls()
  377. obj.text = text
  378. self.keyField = kwargs.get('keyField', 'Index')
  379. self.extension = kwargs.get('extension', '')
  380. self.prefix = kwargs.get('prefix', '')
  381. self.encoding = kwargs.get('encoding', 'utf-8')
  382. return obj