選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。
このリポジトリはアーカイブされています。 ファイルの閲覧とクローンは可能ですが、プッシュや、課題・プルリクエストのオープンはできません。

497 行
21KB

  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. Template module for a Automatic Text Tools.
  5. (c) 2013 Ivan "Kai SD" Korystin
  6. License: GPLv3
  7. '''
  8. import re
  9. class Template(object):
  10. '''
  11. Empty template class. Generates empty text.
  12. '''
  13. def process(self, data):
  14. '''
  15. Replace this method in subclasses.
  16. '''
  17. return ''
  18. def warning(self, text):
  19. '''
  20. Prints a warning
  21. '''
  22. print text
  23. def log(self, text):
  24. '''
  25. Print information
  26. '''
  27. #print 'Template:', text
  28. pass
  29. class TemplateV2(Template):
  30. '''
  31. Class for reading ATGv2 templates.
  32. ATGv2 template file should be a plain text file, starting with the line
  33. ATGV2
  34. followed by the info line:
  35. [$KeyField$Extension$Prefix$Encoding$]
  36. where
  37. KeyField - is a name of a data column, that contains an identifier.
  38. Extension - is the desired extension for the generated files.
  39. Prefix - is the desired filename prefix for the generated files
  40. Encoding - is the desired encoding for the generated files.
  41. The line may also have some optional keywords before the closing bracket:
  42. oneFile$ - place all generated text into a single file instead of
  43. generating a file for each table row.
  44. After the info line, you can put your text.
  45. You can use following commands to handle the data:
  46. * [$Name$], where Name is the column header,
  47. will be replaced with value from the current row.
  48. * [$ATGLINDEX$] will be replaced with the number of a current row.
  49. * [$ATGESCAPE$name$] , where Name is the column header,
  50. will be replaced with value from the current row, quotes and line endings will be escaped.
  51. * [$ATGHEADER$Text$] and [$ATGFOOTER$Text$] will place the given text
  52. at the begining or at the end of the file. You can't use other
  53. commands in this text.
  54. * [$ATGLIST$Name$Text$], where Name is a multi-column header
  55. (i.e. 'Col' will represent 'Col1', 'Col2', 'Col3' etc)
  56. will repeat the given text for each non-empty value.
  57. You can use other commands in Text. Also [$Name$] inside the list
  58. will be replaced with the value for the current row and column.
  59. * [$ATGLINDEX$] can be used only inside the ATGLIST text,
  60. will be replaced with the current column index.
  61. * [$ATGLISTCUT$Name$Text$] - same as ATGLIST, but the last symbol
  62. will be removed. Useful for removing unnecessary newlines.
  63. * [$ATGIF$Name$Value$Text$] will be replaced with the given text
  64. only if the the given column's value is the same as the given one.
  65. Will be replaced with the empty text otherwise. You can use other
  66. commands in Text.
  67. * [$ATGIFNOT$Name$Value$Text$] - same as ATGIF, but the column's value
  68. should not be equal to the given one.
  69. * [$ATGGREATER$Name$Value$Text$] - same as ATGIF, but the value should
  70. be the number and it should be greater then the given one.
  71. * [$ATGGREATER$Name$Value$Text$] - same as ATGGREATER, but the value
  72. should be less then the given one.
  73. * [$ATGREPLACE$Text1$Text2$] - Will replace Text1 with Text2. Replacements
  74. will be done after all other commands. You can't use regular expressions or
  75. other commands in the text.
  76. * [$ATGPREFIX$Text$] - Will add the given text to the filename prefix.
  77. You can use other commands in text, but do it carefully.
  78. * [$ATGSKIP$] - Skip the current row. Use only in combination with the
  79. ATGIF/ATGIFNOT, or you will generate nothing.
  80. * [$ATGPREV$Name$], where Name is the column header,
  81. will be replaced with the with the value of the given header from the
  82. previous row. ATGSKIP will be used for the first row.
  83. '''
  84. def __init__(self, filename=None, encoding='utf-8', text=''):
  85. '''
  86. Constructor.
  87. filename - name of the ATGv2 template file.
  88. encoding - encoding of the template file.
  89. text - text to use if no filename has been provided.
  90. '''
  91. if filename:
  92. with open(filename, 'r') as templateFile:
  93. topline = templateFile.readline().decode(encoding)
  94. if not topline.startswith('ATGV2'):
  95. raise BaseException('%s is not an ATGv2 template' % (filename))
  96. key = templateFile.readline().decode(encoding)
  97. if key[:2] == '[$' and key[-3:-1] == '$]':
  98. keyInfo = key[2:-2].split('$')
  99. if len(keyInfo) < 4:
  100. raise BaseException('%s has bad ATGv2 key' % (filename))
  101. self.keyField = keyInfo[0]
  102. self.extension = keyInfo[1]
  103. self.prefix = keyInfo[2]
  104. self.encoding = keyInfo[3]
  105. if 'oneFile' in keyInfo[4:]:
  106. self.oneFile = True
  107. else:
  108. self.oneFile = False
  109. self.text = u''
  110. else:
  111. raise BaseException('%s has bad ATGv2 key' % (filename))
  112. for i in templateFile.readlines():
  113. self.text += i.decode(encoding)
  114. else:
  115. self.text = text
  116. self.header = u''
  117. self.footer = u''
  118. self.replacement = {}
  119. self._data = None
  120. self._multiWords = None
  121. def parse(text):
  122. topParts = []
  123. matches = {}
  124. openers = re.finditer('\[\$.*?\$', text)
  125. closers = re.finditer('\$\]', text)
  126. ops = []
  127. try:
  128. cl = closers.next()
  129. while not cl is None:
  130. try:
  131. op = openers.next()
  132. if op.start() < cl.start():
  133. ops.append(op)
  134. else:
  135. idx = -1
  136. try:
  137. while ops[idx].start() > cl.start():
  138. idx -= 1
  139. except:
  140. raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
  141. matches[ops[idx]] = cl
  142. if len(ops) == 1 or idx == -len(ops):
  143. topParts.append(ops[idx])
  144. del ops[idx]
  145. ops.append(op)
  146. try:
  147. cl = closers.next()
  148. except StopIteration:
  149. cl = None
  150. except StopIteration:
  151. idx = -1
  152. try:
  153. while ops[idx].start() > cl.start():
  154. idx -= 1
  155. except:
  156. raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
  157. matches[ops[idx]] = cl
  158. if len(ops) == 1 or idx == -len(ops):
  159. topParts.append(ops[idx])
  160. del ops[idx]
  161. try:
  162. cl = closers.next()
  163. except StopIteration:
  164. cl = None
  165. except StopIteration:
  166. pass
  167. parts = []
  168. for i in topParts:
  169. startPoint = i.end()
  170. endPoint = matches[i].start()
  171. p = (i.group()[2:-1], text[startPoint:endPoint])
  172. if p[0].startswith('ATG'):
  173. parts.insert(0, p)
  174. else:
  175. parts.append(p)
  176. return parts
  177. partCommands = {}
  178. def plain(index, flow, keytag):
  179. if not keytag in self._data.keys:
  180. self.warning('WARNING: keyword not found in table - %s' % (keytag))
  181. return flow
  182. return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index]))
  183. partCommands['_ATGPLAIN'] = plain
  184. def nPlain(index, flow, keytag, number):
  185. if not keytag+str(number) in self._data.keys:
  186. self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number)))
  187. return flow
  188. return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index]))
  189. def lIndex(index, flow, keytag, number):
  190. return flow.replace('[$ATGLINDEX$]', str(number))
  191. def addHeader(index, flow, text):
  192. if self.header.find(text) < 0:
  193. self.header += text
  194. key = '[$ATGHEADER$' + text + '$]'
  195. return flow.replace(key,'')
  196. partCommands['ATGHEADER'] = addHeader
  197. def addFooter(index, flow, text):
  198. if self.footer.find(text) < 0:
  199. self.footer += text
  200. key = '[$ATGFOOTER$' + text + '$]'
  201. return flow.replace(key,'')
  202. partCommands['ATGFOOTER'] = addFooter
  203. def addEscape(index, flow, keytag):
  204. if not keytag in self._data.keys:
  205. self.warning('WARNING: keyword not found in table - %s' % (keytag))
  206. return flow
  207. string = unicode(self._data[keytag, index])
  208. string = string.replace('\n', '\\n')
  209. string = string.replace('"', '\\"')
  210. string = string.replace('\\', '\\\\')
  211. string = string.replace('\'', '\\\'')
  212. return flow.replace('[$ATGESCAPE$%s$]' % (keytag), string)
  213. partCommands['ATGESCAPE'] = addEscape
  214. def addList(index, flow, string):
  215. key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  216. sub = string[len(string.split('$')[0])+1:]
  217. keyTag = string.split('$')[0]
  218. subparts = parse(sub)
  219. myText = u''
  220. if not keyTag in self._multiWords:
  221. self.warning('Keytag %s is not multiple!' % (keyTag))
  222. return flow
  223. for j in xrange(1, self._multiWords[keyTag]+1):
  224. subText = sub
  225. for sp in subparts:
  226. if sp[0] in self._multiWords:
  227. subText = nPlain(index, subText, sp[0], j)
  228. elif sp[0] == 'ATGLINDEX':
  229. subText = lIndex(index, subText, sp[0], j)
  230. elif sp[0] in partCommands:
  231. subText = partCommands[sp[0]](index, subText, sp[1])
  232. elif sp[1] == '':
  233. subText = plain(index, subText, sp[0])
  234. else:
  235. self.warning('Warning: unknown command '+sp[0])
  236. if not self._data[keyTag+str(j), index] == u'':
  237. myText += subText
  238. return flow.replace(key, myText)
  239. partCommands['ATGLIST'] = addList
  240. def addListCut(index, flow, string):
  241. key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  242. sub = string[len(string.split('$')[0])+1:]
  243. keyTag = string.split('$')[0]
  244. subparts = parse(sub)
  245. myText = u''
  246. if not keyTag in self._multiWords:
  247. self.warning('Keytag %s is not multiple!' % (keyTag))
  248. return flow
  249. for j in xrange(1, self._multiWords[keyTag]+1):
  250. subText = sub
  251. for sp in subparts:
  252. if sp[0] in self._multiWords:
  253. subText = nPlain(index, subText, sp[0], j)
  254. elif sp[0] == 'ATGLINDEX':
  255. subText = lIndex(index, subText, sp[0], j)
  256. elif sp[0] in partCommands:
  257. subText = partCommands[sp[0]](index, subText, sp[1])
  258. elif sp[1] == '':
  259. subText = plain(index, subText, sp[0])
  260. else:
  261. self.warning('Warning: unknown command '+sp[0])
  262. if not self._data[keyTag+str(j), index] == u'':
  263. myText += subText
  264. return flow.replace(key, myText[:-1])
  265. partCommands['ATGLISTCUT'] = addListCut
  266. def addIf(index, flow, string):
  267. key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  268. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  269. keyTag = string.split('$')[0]
  270. targetValue = string.split('$')[1]
  271. subparts = parse(sub)
  272. myText = u''
  273. if self._data[keyTag, 0] == []:
  274. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  275. return flow
  276. if unicode(self._data[keyTag, index]) == unicode(targetValue):
  277. subText = sub
  278. for sp in subparts:
  279. if sp[0] in partCommands:
  280. subText = partCommands[sp[0]](index, subText, sp[1])
  281. elif sp[1] == '':
  282. subText = plain(index, subText, sp[0])
  283. else:
  284. self.warning('Warning: unknown command '+sp[0])
  285. myText += subText
  286. return flow.replace(key, myText)
  287. partCommands['ATGIF'] = addIf
  288. def addIfNot(index, flow, string):
  289. key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  290. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  291. keyTag = string.split('$')[0]
  292. targetValue = string.split('$')[1]
  293. subparts = parse(sub)
  294. myText = u''
  295. if self._data[keyTag, 0] == []:
  296. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  297. return flow
  298. if not unicode(self._data[keyTag, index]) == unicode(targetValue):
  299. subText = sub
  300. for sp in subparts:
  301. if sp[0] in partCommands:
  302. subText = partCommands[sp[0]](index, subText, sp[1])
  303. elif sp[1] == '':
  304. subText = plain(index, subText, sp[0])
  305. else:
  306. self.warning('Warning: unknown command '+sp[0])
  307. myText += subText
  308. return flow.replace(key, myText)
  309. partCommands['ATGIFNOT'] = addIfNot
  310. def addGreater(index, flow, string):
  311. key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  312. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  313. keyTag = string.split('$')[0]
  314. targetValue = string.split('$')[1]
  315. subparts = parse(sub)
  316. myText = u''
  317. if self._data[keyTag, 0] == []:
  318. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  319. return flow
  320. try:
  321. if float(self._data[keyTag, index]) > float(targetValue):
  322. subText = sub
  323. for sp in subparts:
  324. if sp[0] in partCommands:
  325. subText = partCommands[sp[0]](index, subText, sp[1])
  326. elif sp[1] == '':
  327. subText = plain(index, subText, sp[0])
  328. else:
  329. self.warning('Warning: unknown command '+sp[0])
  330. myText += subText
  331. except:
  332. self.warning('ERROR: trying to compare uncomparable values!')
  333. return flow.replace(key, myText)
  334. partCommands['ATGGREATER'] = addGreater
  335. def addLess(index, flow, string):
  336. key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  337. sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
  338. keyTag = string.split('$')[0]
  339. targetValue = string.split('$')[1]
  340. subparts = parse(sub)
  341. myText = u''
  342. if self._data[keyTag, 0] == []:
  343. self.warning('WARNING: keyword not found in table - %s' % (keyTag))
  344. return flow
  345. try:
  346. if float(self._data[keyTag, index]) < float(targetValue):
  347. subText = sub
  348. for sp in subparts:
  349. if sp[0] in partCommands:
  350. subText = partCommands[sp[0]](index, subText, sp[1])
  351. elif sp[1] == '':
  352. subText = plain(index, subText, sp[0])
  353. else:
  354. self.warning('Warning: unknown command '+sp[0])
  355. myText += subText
  356. except:
  357. self.warning('ERROR: trying to compare uncomparable values!')
  358. return flow.replace(key, myText)
  359. partCommands['ATGLESS'] = addLess
  360. def addReplace(index, flow, string):
  361. key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  362. targetString = string[len(string.split('$')[0])+1:]
  363. srcString = string.split('$')[0]
  364. self.replacement[srcString] = targetString
  365. key = '[$ATGREPLACE$' + string + '$]'
  366. return flow.replace(key,'')
  367. partCommands['ATGREPLACE'] = addReplace
  368. def addPrefix(index, flow, string):
  369. key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
  370. sub = string
  371. subparts = parse(sub)
  372. for sp in subparts:
  373. if sp[0] in partCommands:
  374. sub = partCommands[sp[0]](index, sub, sp[1])
  375. elif sp[1] == '':
  376. sub = plain(index, sub, sp[0])
  377. else:
  378. self.warning('Warning: unknown command '+sp[0])
  379. self.bonusPrefix += sub
  380. key = '[$ATGPREFIX$' + string + '$]'
  381. return flow.replace(key,'')
  382. partCommands['ATGPREFIX'] = addPrefix
  383. def skip(index, flow, string):
  384. return u'[$ATGSKIP_DO$]'
  385. partCommands['ATGSKIP'] = skip
  386. def prev(index, flow, string):
  387. key = '[$ATGPREV$%s$]' % (string.split('$')[0])
  388. keytag = string.split('$')[0]
  389. if self._data[keytag, 0] == []:
  390. self.warning('WARNING: keyword not found in table - %s' % (keytag))
  391. return flow
  392. if index == 0:
  393. self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0')
  394. return u'[$ATGSKIP_DO$]'
  395. return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1]))
  396. partCommands['ATGPREV'] = prev
  397. self.commands = partCommands
  398. self.parts = parse(self.text)
  399. def process(self, data):
  400. '''
  401. Generate text for the given data.
  402. '''
  403. self._data = data
  404. multiWords = {}
  405. numbs = ('1','2','3','4','5','6','7','8','9','0')
  406. for i in data.keys:
  407. multi = False
  408. while i[-1] in numbs:
  409. i = i[:-1]
  410. multi = True
  411. if multi:
  412. if i in multiWords:
  413. multiWords[i] += 1
  414. else:
  415. multiWords[i] = 1
  416. self._multiWords = multiWords
  417. if self.oneFile:
  418. out = ''
  419. else:
  420. out = {}
  421. index = 0
  422. partCommands = self.commands
  423. for element in data.col_by_key(self.keyField):
  424. self.bonusPrefix = self.prefix
  425. text = self.text
  426. for i in self.parts:
  427. if i[0] in partCommands:
  428. text = partCommands[i[0]](index, text, i[1])
  429. elif i[1] == u'':
  430. text = partCommands['_ATGPLAIN'](index, text, i[0])
  431. else:
  432. self.warning('Warning: unknown command '+i[0])
  433. for i in self.replacement:
  434. text = text.replace(i, self.replacement[i])
  435. self.replacement = {}
  436. index += 1
  437. if u'[$ATGSKIP_DO$]' in text:
  438. self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.')
  439. else:
  440. if self.oneFile:
  441. out += text
  442. else:
  443. name = self.bonusPrefix + unicode(element)
  444. out[name] = self.header + text + self.footer
  445. self.log('Created %s' % (element))
  446. if self.oneFile:
  447. out = self.header + out + self.footer
  448. return out
  449. @staticmethod
  450. def express(cls, text, **kwargs):
  451. obj = cls()
  452. obj.text = text
  453. obj.keyField = kwargs.get('keyField', 'Index')
  454. obj.extension = kwargs.get('extension', '')
  455. obj.prefix = kwargs.get('prefix', '')
  456. obj.encoding = kwargs.get('encoding', 'utf-8')
  457. return obj