|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419 |
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- '''
- Template module for a Kai's Text Tools.
-
- (c) 2013 Ivan "Kai SD" Korystin
-
- License: GPLv3
- '''
-
- import re
- class TemplateV3(object):
- '''
- Class for reading ATGv3 templates.
- '''
- pass
-
- class TemplateV2(object):
- '''
- Class for reading ATGv2 templates.
- '''
-
- def __init__(self, filename=None, encoding='utf-8', text=''):
- '''
- Constructor
- '''
- if filename:
- with open(filename, 'r') as templateFile:
- topline = templateFile.readline().decode(encoding)
- if not topline.startswith('ATGV2'):
- raise BaseException('%s is not an ATGv2 template' % (filename))
-
- key = templateFile.readline().decode(encoding)
- if key[:2] == '[$' and key[-3:-1] == '$]':
- keyInfo = key[2:-2].split('$')
- if len(keyInfo) < 4:
- raise BaseException('%s has bad ATGv2 key' % (filename))
- self.keyField = keyInfo[0]
- self.extension = keyInfo[1]
- self.prefix = keyInfo[2]
- self.encoding = keyInfo[3]
- if 'oneFile' in keyInfo[4:]:
- self.oneFile = True
- else:
- self.oneFile = False
- if 'transpose' in keyInfo[4:]:
- self.transpose = True
- else:
- self.transpose = False
- self.text = u''
- else:
- raise BaseException('%s has bad ATGv2 key' % (filename))
-
- for i in templateFile.readlines():
- self.text += i.decode(encoding)
- else:
- self.text = text
-
- self.key = u''
- self.footer = u''
- self.replacement = {}
- self._data = None
- self._multiWords = None
-
- def parse(text):
- topParts = []
- matches = {}
-
- openers = re.finditer('\[\$.*?\$', text)
- closers = re.finditer('\$\]', text)
- ops = []
- try:
- cl = closers.next()
- while not cl is None:
- try:
- op = openers.next()
- if op.start() < cl.start():
- ops.append(op)
- else:
- idx = -1
- try:
- while ops[idx].start() > cl.start():
- idx -= 1
- except:
- raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
- matches[ops[idx]] = cl
- if len(ops) == 1 or idx == -len(ops):
- topParts.append(ops[idx])
- del ops[idx]
- ops.append(op)
- try:
- cl = closers.next()
- except StopIteration:
- cl = None
- except StopIteration:
- idx = -1
- try:
- while ops[idx].start() > cl.start():
- idx -= 1
- except:
- raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
- matches[ops[idx]] = cl
- if len(ops) == 1 or idx == -len(ops):
- topParts.append(ops[idx])
- del ops[idx]
- try:
- cl = closers.next()
- except StopIteration:
- cl = None
- except StopIteration:
- pass
- parts = []
- for i in topParts:
- startPoint = i.end()
- endPoint = matches[i].start()
- p = (i.group()[2:-1], text[startPoint:endPoint])
- if p[0].startswith('ATG'):
- parts.insert(0, p)
- else:
- parts.append(p)
- return parts
-
- partCommands = {}
-
- def plain(index, flow, keytag):
- if not keytag in self._data.keys:
- self.warning('WARNING: keyword not found in table - %s' % (keytag))
- return flow
- return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index]))
- partCommands['_ATGPLAIN'] = plain
-
- def nPlain(index, flow, keytag, number):
- if not keytag+str(number) in self._data.keys:
- self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number)))
- return flow
- return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index]))
-
- def lIndex(index, flow, keytag, number):
- return flow.replace('[$ATGLINDEX$]', str(number))
-
- def addkey(index, flow, text):
- if self.key.find(text) < 0:
- self.key += text
- key = '[$ATGkey$' + text + '$]'
- return flow.replace(key,'')
- partCommands['ATGkey'] = addkey
-
- def addFooter(index, flow, text):
- if self.footer.find(text) < 0:
- self.footer += text
- key = '[$ATGFOOTER$' + text + '$]'
- return flow.replace(key,'')
- partCommands['ATGFOOTER'] = addFooter
-
- def addList(index, flow, string):
- key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- sub = string[len(string.split('$')[0])+1:]
- keyTag = string.split('$')[0]
- subparts = parse(sub)
- myText = u''
- if not keyTag in self._multiWords:
- self.warning('Keytag %s is not multiple!' % (keyTag))
- return flow
- for j in xrange(1, self._multiWords[keyTag]+1):
- subText = sub
- for sp in subparts:
- if sp[0] in self._multiWords:
- subText = nPlain(index, subText, sp[0], j)
- elif sp[0] == 'ATGLINDEX':
- subText = lIndex(index, subText, sp[0], j)
- elif sp[0] in partCommands:
- subText = partCommands[sp[0]](index, subText, sp[1])
- elif sp[1] == '':
- subText = plain(index, subText, sp[0])
- else:
- self.warning('Warning: unknown command '+sp[0])
- if not self._data[keyTag+str(j), index] == u'':
- myText += subText
- return flow.replace(key, myText)
- partCommands['ATGLIST'] = addList
-
- def addListCut(index, flow, string):
- key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- sub = string[len(string.split('$')[0])+1:]
- keyTag = string.split('$')[0]
- subparts = parse(sub)
- myText = u''
- if not keyTag in self._multiWords:
- self.warning('Keytag %s is not multiple!' % (keyTag))
- return flow
- for j in xrange(1, self._multiWords[keyTag]+1):
- subText = sub
- for sp in subparts:
- if sp[0] in self._multiWords:
- subText = nPlain(index, subText, sp[0], j)
- elif sp[0] == 'ATGLINDEX':
- subText = lIndex(index, subText, sp[0], j)
- elif sp[0] in partCommands:
- subText = partCommands[sp[0]](index, subText, sp[1])
- elif sp[1] == '':
- subText = plain(index, subText, sp[0])
- else:
- self.warning('Warning: unknown command '+sp[0])
- if not self._data[keyTag+str(j), index] == u'':
- myText += subText
- return flow.replace(key, myText[:-1])
- partCommands['ATGLISTCUT'] = addListCut
-
- def addIf(index, flow, string):
- key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
- keyTag = string.split('$')[0]
- targetValue = string.split('$')[1]
- subparts = parse(sub)
- myText = u''
- if self._data[keyTag, 0] == []:
- self.warning('WARNING: keyword not found in table - %s' % (keyTag))
- return flow
- if unicode(self._data[keyTag, index]) == unicode(targetValue):
- subText = sub
- for sp in subparts:
- if sp[0] in partCommands:
- subText = partCommands[sp[0]](index, subText, sp[1])
- elif sp[1] == '':
- subText = plain(index, subText, sp[0])
- else:
- self.warning('Warning: unknown command '+sp[0])
- myText += subText
- return flow.replace(key, myText)
- partCommands['ATGIF'] = addIf
-
- def addIfNot(index, flow, string):
- key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
- keyTag = string.split('$')[0]
- targetValue = string.split('$')[1]
- subparts = parse(sub)
- myText = u''
- if self._data[keyTag, 0] == []:
- self.warning('WARNING: keyword not found in table - %s' % (keyTag))
- return flow
- if not unicode(self._data[keyTag, index]) == unicode(targetValue):
- subText = sub
- for sp in subparts:
- if sp[0] in partCommands:
- subText = partCommands[sp[0]](index, subText, sp[1])
- elif sp[1] == '':
- subText = plain(index, subText, sp[0])
- else:
- self.warning('Warning: unknown command '+sp[0])
- myText += subText
- return flow.replace(key, myText)
- partCommands['ATGIFNOT'] = addIfNot
-
- def addGreater(index, flow, string):
- key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
- keyTag = string.split('$')[0]
- targetValue = string.split('$')[1]
- subparts = parse(sub)
- myText = u''
- if self._data[keyTag, 0] == []:
- self.warning('WARNING: keyword not found in table - %s' % (keyTag))
- return flow
- try:
- if float(self._data[keyTag, index]) > float(targetValue):
- subText = sub
- for sp in subparts:
- if sp[0] in partCommands:
- subText = partCommands[sp[0]](index, subText, sp[1])
- elif sp[1] == '':
- subText = plain(index, subText, sp[0])
- else:
- self.warning('Warning: unknown command '+sp[0])
- myText += subText
- except:
- self.warning('ERROR: trying to compare uncomparable values!')
- return flow.replace(key, myText)
- partCommands['ATGGREATER'] = addGreater
-
- def addLess(index, flow, string):
- key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
- keyTag = string.split('$')[0]
- targetValue = string.split('$')[1]
- subparts = parse(sub)
- myText = u''
- if self._data[keyTag, 0] == []:
- self.warning('WARNING: keyword not found in table - %s' % (keyTag))
- return flow
- try:
- if float(self._data[keyTag, index]) < float(targetValue):
- subText = sub
- for sp in subparts:
- if sp[0] in partCommands:
- subText = partCommands[sp[0]](index, subText, sp[1])
- elif sp[1] == '':
- subText = plain(index, subText, sp[0])
- else:
- self.warning('Warning: unknown command '+sp[0])
- myText += subText
- except:
- self.warning('ERROR: trying to compare uncomparable values!')
- return flow.replace(key, myText)
- partCommands['ATGLESS'] = addLess
-
- def addReplace(index, flow, string):
- key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- targetString = string[len(string.split('$')[0])+1:]
- srcString = string.split('$')[0]
- self.replacement[srcString] = targetString
- key = '[$ATGREPLACE$' + string + '$]'
- return flow.replace(key,'')
- partCommands['ATGREPLACE'] = addReplace
-
- def addPrefix(index, flow, string):
- key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
- sub = string
- subparts = parse(sub)
- for sp in subparts:
- if sp[0] in partCommands:
- sub = partCommands[sp[0]](index, sub, sp[1])
- elif sp[1] == '':
- sub = plain(index, sub, sp[0])
- else:
- self.warning('Warning: unknown command '+sp[0])
- self.bonusPrefix += sub
- key = '[$ATGPREFIX$' + string + '$]'
- return flow.replace(key,'')
- partCommands['ATGPREFIX'] = addPrefix
-
- def skip(index, flow, string):
- return u'[$ATGSKIP_DO$]'
- partCommands['ATGSKIP'] = skip
-
- def prev(index, flow, string):
- key = '[$ATGPREV$%s$]' % (string.split('$')[0])
- keytag = string.split('$')[0]
- if self._data[keytag, 0] == []:
- self.warning('WARNING: keyword not found in table - %s' % (keytag))
- return flow
- if index == 0:
- self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0')
- return u'[$ATGSKIP_DO$]'
- return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1]))
- partCommands['ATGPREV'] = prev
-
- self.commands = partCommands
- self.parts = parse(self.text)
-
- def process(self, data):
- self._data = data
-
- multiWords = {}
- numbs = ('1','2','3','4','5','6','7','8','9','0')
-
- for i in data.keys:
- multi = False
- while i[-1] in numbs:
- i = i[:-1]
- multi = True
- if multi:
- if i in multiWords:
- multiWords[i] += 1
- else:
- multiWords[i] = 1
- self._multiWords = multiWords
-
- if self.oneFile:
- out = ''
- else:
- out = {}
- index = 0
- partCommands = self.commands
- for element in data.col_by_key(self.keyField):
- self.bonusPrefix = self.prefix
- text = self.text
- for i in self.parts:
- if i[0] in partCommands:
- text = partCommands[i[0]](index, text, i[1])
- elif i[1] == u'':
- text = partCommands['_ATGPLAIN'](index, text, i[0])
- else:
- self.warning('Warning: unknown command '+i[0])
- for i in self.replacement:
- text = text.replace(i, self.replacement[i])
- self.replacement = {}
- index += 1
-
- if u'[$ATGSKIP_DO$]' in text:
- self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.')
- else:
- if self.oneFile:
- out += text
- else:
- name = self.bonusPrefix + unicode(element)
- out[name] = text
- self.log('Created %s' % (element))
-
- if self.oneFile:
- out = self.key + out + self.footer
-
- return out
-
- def warning(self, text):
- print text
-
- def log(self, text):
- pass
-
- @staticmethod
- def express(cls, text, **kwargs):
- obj = cls()
- obj.text = text
- self.keyField = kwargs.get('keyField', 'Index')
- self.extension = kwargs.get('extension', '')
- self.prefix = kwargs.get('prefix', '')
- self.encoding = kwargs.get('encoding', 'utf-8')
- return obj
|