Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
Tento repozitář je archivovaný. Můžete prohlížet soubory, klonovat, ale nemůžete nahrávat a vytvářet nové úkoly a požadavky na natažení.

483 řádky
20KB

  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. Template module for a Automatic Text Tools.
  5. (c) 2013 Ivan "Kai SD" Korystin
  6. License: GPLv3
  7. '''
  8. import re
  9. class Template(object):
  10. '''
  11. Empty template class. Generates empty text.
  12. '''
  13. def process(self, data):
  14. '''
  15. Replace this method in subclasses.
  16. '''
  17. return ''
  18. def warning(self, text):
  19. '''
  20. Prints a warning
  21. '''
  22. print text
  23. def log(self, text):
  24. '''
  25. Print information
  26. '''
  27. #print 'Template:', text
  28. pass
  29. class TemplateV2(Template):
  30. '''
  31. Class for reading ATGv2 templates.
  32. ATGv2 template file should be a plain text file, starting with the line
  33. ATGV2
  34. followed by the info line:
  35. [$KeyField$Extension$Prefix$Encoding$]
  36. where
  37. KeyField - is a name of a data column, that contains an identifier.
  38. Extension - is the desired extension for the generated files.
  39. Prefix - is the desired filename prefix for the generated files
  40. Encoding - is the desired encoding for the generated files.
  41. The line may also have some optional keywords before the closing bracket:
  42. oneFile$ - place all generated text into a single file instead of
  43. generating a file for each table row.
  44. After the info line, you can put your text.
  45. You can use following commands to handle the data:
  46. * [$Name$], where Name is the column header,
  47. will be replaced with value from the current row.
  48. * [$ATGLINDEX$] will be replaced with the number of a current row.
  49. * [$ATGHEADER$Text$] and [$ATGFOOTER$Text$] will place the given text
  50. at the begining or at the end of the file. You can't use other
  51. commands in this text.
  52. * [$ATGLIST$Name$Text$], where Name is a multi-column header
  53. (i.e. 'Col' will represent 'Col1', 'Col2', 'Col3' etc)
  54. will repeat the given text for each non-empty value.
  55. You can use other commands in Text. Also [$Name$] inside the list
  56. will be replaced with the value for the current row and column.
  57. * [$ATGLINDEX$] can be used only inside the ATGLIST text,
  58. will be replaced with the current column index.
  59. * [$ATGLISTCUT$Name$Text$] - same as ATGLIST, but the last symbol
  60. will be removed. Useful for removing unnecessary newlines.
  61. * [$ATGIF$Name$Value$Text$] will be replaced with the given text
  62. only if the the given column's value is the same as the given one.
  63. Will be replaced with the empty text otherwise. You can use other
  64. commands in Text.
  65. * [$ATGIFNOT$Name$Value$Text$] - same as ATGIF, but the column's value
  66. should not be equal to the given one.
  67. * [$ATGGREATER$Name$Value$Text$] - same as ATGIF, but the value should
  68. be the number and it should be greater then the given one.
  69. * [$ATGGREATER$Name$Value$Text$] - same as ATGGREATER, but the value
  70. should be less then the given one.
  71. * [$ATGREPLACE$Text1$Text2$] - Will replace Text1 with Text2. Replacements
  72. will be done after all other commands. You can't use regular expressions or
  73. other commands in the text.
  74. * [$ATGPREFIX$Text$] - Will add the given text to the filename prefix.
  75. You can use other commands in text, but do it carefully.
  76. * [$ATGSKIP$] - Skip the current row. Use only in combination with the
  77. ATGIF/ATGIFNOT, or you will generate nothing.
  78. * [$ATGPREV$Name$], where Name is the column header,
  79. will be replaced with the with the value of the given header from the
  80. previous row. ATGSKIP will be used for the first row.
  81. '''
  82. def __init__(self, filename=None, encoding='utf-8', text=''):
  83. '''
  84. Constructor.
  85. filename - name of the ATGv2 template file.
  86. encoding - encoding of the template file.
  87. text - text to use if no filename has been provided.
  88. '''
  89. if filename:
  90. with open(filename, 'r') as templateFile:
  91. topline = templateFile.readline().decode(encoding)
  92. if not topline.startswith('ATGV2'):
  93. raise BaseException('%s is not an ATGv2 template' % (filename))
  94. key = templateFile.readline().decode(encoding)
  95. if key[:2] == '[$' and key[-3:-1] == '$]':
  96. keyInfo = key[2:-2].split('$')
  97. if len(keyInfo) < 4:
  98. raise BaseException('%s has bad ATGv2 key' % (filename))
  99. self.keyField = keyInfo[0]
  100. self.extension = keyInfo[1]
  101. self.prefix = keyInfo[2]
  102. self.encoding = keyInfo[3]
  103. if 'oneFile' in keyInfo[4:]:
  104. self.oneFile = True
  105. else:
  106. self.oneFile = False
  107. self.text = u''
  108. else:
  109. raise BaseException('%s has bad ATGv2 key' % (filename))
  110. for i in templateFile.readlines():
  111. self.text += i.decode(encoding)
  112. else:
  113. self.text = text
  114. self.header = u''
  115. self.footer = u''
  116. self.replacement = {}
  117. self._data = None
  118. self._multiWords = None
  119. def parse(text):
  120. topParts = []
  121. matches = {}
  122. openers = re.finditer('\[\$.*?\$', text)
  123. closers = re.finditer('\$\]', text)
  124. ops = []
  125. try:
  126. cl = closers.next()
  127. while not cl is None:
  128. try:
  129. op = openers.next()
  130. if op.start() < cl.start():
  131. ops.append(op)
  132. else:
  133. idx = -1
  134. try:
  135. while ops[idx].start() > cl.start():
  136. idx -= 1
  137. except:
  138. raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
  139. matches[ops[idx]] = cl
  140. if len(ops) == 1 or idx == -len(ops):
  141. topParts.append(ops[idx])
  142. del ops[idx]
  143. ops.append(op)
  144. try:
  145. cl = closers.next()
  146. except StopIteration:
  147. cl = None
  148. except StopIteration:
  149. idx = -1
  150. try:
  151. while ops[idx].start() > cl.start():
  152. idx -= 1
  153. except:
  154. raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
  155. matches[ops[idx]] = cl
  156. if len(ops) == 1 or idx == -len(ops):
  157. topParts.append(ops[idx])
  158. del ops[idx]
  159. try:
  160. cl = closers.next()
  161. except StopIteration:
  162. cl = None
  163. except StopIteration:
  164. pass
  165. parts = []
  166. for i in topParts:
  167. startPoint = i.end()
  168. endPoint = matches[i].start()
  169. p = (i.group()[2:-1], text[startPoint:endPoint])
  170. if p[0].startswith('ATG'):
  171. parts.insert(0, p)
  172. else:
  173. parts.append(p)
  174. return parts
  175. partCommands = {}
  176. def plain(index, flow, keytag):
  177. if not keytag in self._data.keys:
  178. self.warning('WARNING: keyword not found in table - %s' % (keytag))
  179. return flow
  180. return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index]))
  181. partCommands['_ATGPLAIN'] = plain
  182. def nPlain(index, flow, keytag, number):
  183. if not keytag+str(number) in self._data.keys:
  184. self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number)))
  185. return flow
  186. return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index]))
  187. def lIndex(index, flow, keytag, number):
  188. return flow.replace('[$ATGLINDEX$]', str(number))
  189. def addHeader(index, flow, text):
  190. if self.header.find(text) < 0:
  191. self.header += text
  192. key = '[$ATGHEADER$' + text + '$]'
  193. return flow.replace(key,'')
  194. partCommands['ATGHEADER'] = addHeader
  195. def addFooter(index, flow, text):
  196. if self.footer.find(text) < 0:
  197. self.footer += text
  198. key = '[$ATGFOOTER$' + text + '$]'
  199. return flow.replace(key,'')
  200. partCommands['ATGFOOTER'] = addFooter
  201. def addList(index, flow, string):
  202. key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  203. sub = string[len(string.split('$')[0])+1:]
  204. keyTag = string.split('$')[0]
  205. subparts = parse(sub)
  206. myText = u''
  207. if not keyTag in self._multiWords:
  208. self.warning('Keytag %s is not multiple!' % (keyTag))
  209. return flow
  210. for j in xrange(1, self._multiWords[keyTag]+1):
  211. subText = sub
  212. for sp in subparts:
  213. if sp[0] in self._multiWords:
  214. subText = nPlain(index, subText, sp[0], j)
  215. elif sp[0] == 'ATGLINDEX':
  216. subText = lIndex(index, subText, sp[0], j)
  217. elif sp[0] in partCommands:
  218. subText = partCommands[sp[0]](index, subText, sp[1])
  219. elif sp[1] == '':
  220. subText = plain(index, subText, sp[0])
  221. else:
  222. self.warning('Warning: unknown command '+sp[0])
  223. if not self._data[keyTag+str(j), index] == u'':
  224. myText += subText
  225. return flow.replace(key, myText)
  226. partCommands['ATGLIST'] = addList
  227. def addListCut(index, flow, string):
  228. key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  229. sub = string[len(string.split('$')[0])+1:]
  230. keyTag = string.split('$')[0]
  231. subparts = parse(sub)
  232. myText = u''
  233. if not keyTag in self._multiWords:
  234. self.warning('Keytag %s is not multiple!' % (keyTag))
  235. return flow
  236. for j in xrange(1, self._multiWords[keyTag]+1):
  237. subText = sub
  238. for sp in subparts:
  239. if sp[0] in self._multiWords:
  240. subText = nPlain(index, subText, sp[0], j)
  241. elif sp[0] == 'ATGLINDEX':
  242. subText = lIndex(index, subText, sp[0], j)
  243. elif sp[0] in partCommands:
  244. subText = partCommands[sp[0]](index, subText, sp[1])
  245. elif sp[1] == '':
  246. subText = plain(index, subText, sp[0])
  247. else:
  248. self.warning('Warning: unknown command '+sp[0])
  249. if not self._data[keyTag+str(j), index] == u'':
  250. myText += subText
  251. return flow.replace(key, myText[:-1])
  252. partCommands['ATGLISTCUT'] = addListCut
  253. def addIf(index, flow, string):
  254. key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  255. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  256. keyTag = string.split('$')[0]
  257. targetValue = string.split('$')[1]
  258. subparts = parse(sub)
  259. myText = u''
  260. if self._data[keyTag, 0] == []:
  261. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  262. return flow
  263. if unicode(self._data[keyTag, index]) == unicode(targetValue):
  264. subText = sub
  265. for sp in subparts:
  266. if sp[0] in partCommands:
  267. subText = partCommands[sp[0]](index, subText, sp[1])
  268. elif sp[1] == '':
  269. subText = plain(index, subText, sp[0])
  270. else:
  271. self.warning('Warning: unknown command '+sp[0])
  272. myText += subText
  273. return flow.replace(key, myText)
  274. partCommands['ATGIF'] = addIf
  275. def addIfNot(index, flow, string):
  276. key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  277. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  278. keyTag = string.split('$')[0]
  279. targetValue = string.split('$')[1]
  280. subparts = parse(sub)
  281. myText = u''
  282. if self._data[keyTag, 0] == []:
  283. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  284. return flow
  285. if not unicode(self._data[keyTag, index]) == unicode(targetValue):
  286. subText = sub
  287. for sp in subparts:
  288. if sp[0] in partCommands:
  289. subText = partCommands[sp[0]](index, subText, sp[1])
  290. elif sp[1] == '':
  291. subText = plain(index, subText, sp[0])
  292. else:
  293. self.warning('Warning: unknown command '+sp[0])
  294. myText += subText
  295. return flow.replace(key, myText)
  296. partCommands['ATGIFNOT'] = addIfNot
  297. def addGreater(index, flow, string):
  298. key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  299. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  300. keyTag = string.split('$')[0]
  301. targetValue = string.split('$')[1]
  302. subparts = parse(sub)
  303. myText = u''
  304. if self._data[keyTag, 0] == []:
  305. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  306. return flow
  307. try:
  308. if float(self._data[keyTag, index]) > float(targetValue):
  309. subText = sub
  310. for sp in subparts:
  311. if sp[0] in partCommands:
  312. subText = partCommands[sp[0]](index, subText, sp[1])
  313. elif sp[1] == '':
  314. subText = plain(index, subText, sp[0])
  315. else:
  316. self.warning('Warning: unknown command '+sp[0])
  317. myText += subText
  318. except:
  319. self.warning('ERROR: trying to compare uncomparable values!')
  320. return flow.replace(key, myText)
  321. partCommands['ATGGREATER'] = addGreater
  322. def addLess(index, flow, string):
  323. key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  324. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  325. keyTag = string.split('$')[0]
  326. targetValue = string.split('$')[1]
  327. subparts = parse(sub)
  328. myText = u''
  329. if self._data[keyTag, 0] == []:
  330. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  331. return flow
  332. try:
  333. if float(self._data[keyTag, index]) < float(targetValue):
  334. subText = sub
  335. for sp in subparts:
  336. if sp[0] in partCommands:
  337. subText = partCommands[sp[0]](index, subText, sp[1])
  338. elif sp[1] == '':
  339. subText = plain(index, subText, sp[0])
  340. else:
  341. self.warning('Warning: unknown command '+sp[0])
  342. myText += subText
  343. except:
  344. self.warning('ERROR: trying to compare uncomparable values!')
  345. return flow.replace(key, myText)
  346. partCommands['ATGLESS'] = addLess
  347. def addReplace(index, flow, string):
  348. key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  349. targetString = string[len(string.split('$')[0])+1:]
  350. srcString = string.split('$')[0]
  351. self.replacement[srcString] = targetString
  352. key = '[$ATGREPLACE$' + string + '$]'
  353. return flow.replace(key,'')
  354. partCommands['ATGREPLACE'] = addReplace
  355. def addPrefix(index, flow, string):
  356. key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  357. sub = string
  358. subparts = parse(sub)
  359. for sp in subparts:
  360. if sp[0] in partCommands:
  361. sub = partCommands[sp[0]](index, sub, sp[1])
  362. elif sp[1] == '':
  363. sub = plain(index, sub, sp[0])
  364. else:
  365. self.warning('Warning: unknown command '+sp[0])
  366. self.bonusPrefix += sub
  367. key = '[$ATGPREFIX$' + string + '$]'
  368. return flow.replace(key,'')
  369. partCommands['ATGPREFIX'] = addPrefix
  370. def skip(index, flow, string):
  371. return u'[$ATGSKIP_DO$]'
  372. partCommands['ATGSKIP'] = skip
  373. def prev(index, flow, string):
  374. key = '[$ATGPREV$%s$]' % (string.split('$')[0])
  375. keytag = string.split('$')[0]
  376. if self._data[keytag, 0] == []:
  377. self.warning('WARNING: keyword not found in table - %s' % (keytag))
  378. return flow
  379. if index == 0:
  380. self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0')
  381. return u'[$ATGSKIP_DO$]'
  382. return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1]))
  383. partCommands['ATGPREV'] = prev
  384. self.commands = partCommands
  385. self.parts = parse(self.text)
  386. def process(self, data):
  387. '''
  388. Generate text for the given data.
  389. '''
  390. self._data = data
  391. multiWords = {}
  392. numbs = ('1','2','3','4','5','6','7','8','9','0')
  393. for i in data.keys:
  394. multi = False
  395. while i[-1] in numbs:
  396. i = i[:-1]
  397. multi = True
  398. if multi:
  399. if i in multiWords:
  400. multiWords[i] += 1
  401. else:
  402. multiWords[i] = 1
  403. self._multiWords = multiWords
  404. if self.oneFile:
  405. out = ''
  406. else:
  407. out = {}
  408. index = 0
  409. partCommands = self.commands
  410. for element in data.col_by_key(self.keyField):
  411. self.bonusPrefix = self.prefix
  412. text = self.text
  413. for i in self.parts:
  414. if i[0] in partCommands:
  415. text = partCommands[i[0]](index, text, i[1])
  416. elif i[1] == u'':
  417. text = partCommands['_ATGPLAIN'](index, text, i[0])
  418. else:
  419. self.warning('Warning: unknown command '+i[0])
  420. for i in self.replacement:
  421. text = text.replace(i, self.replacement[i])
  422. self.replacement = {}
  423. index += 1
  424. if u'[$ATGSKIP_DO$]' in text:
  425. self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.')
  426. else:
  427. if self.oneFile:
  428. out += text
  429. else:
  430. name = self.bonusPrefix + unicode(element)
  431. out[name] = self.header + text + self.footer
  432. self.log('Created %s' % (element))
  433. if self.oneFile:
  434. out = self.header + out + self.footer
  435. return out
  436. @staticmethod
  437. def express(cls, text, **kwargs):
  438. obj = cls()
  439. obj.text = text
  440. obj.keyField = kwargs.get('keyField', 'Index')
  441. obj.extension = kwargs.get('extension', '')
  442. obj.prefix = kwargs.get('prefix', '')
  443. obj.encoding = kwargs.get('encoding', 'utf-8')
  444. return obj