KaiSD
/
att


			
				
					
						
						
							
							#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Template module for a Automatic Text Tools.

(c) 2013 Ivan "Kai SD" Korystin 

License: GPLv3
'''

import re
class Template(object):
    '''
    Empty template class. Generates empty text.
    '''
    def process(self, data):
        '''
        Replace this method in subclasses. 
        '''
        return ''
    
    def warning(self, text):
        '''
        Prints a warning
        '''
        print text
    
    def log(self, text):
        '''
        Print information
        '''
        #print 'Template:', text
        pass
    
class TemplateV2(Template):
    '''
    Class for reading ATGv2 templates.
    
    ATGv2 template file should be a plain text file, starting with the line
    ATGV2
    followed by the info line:
    [$KeyField$Extension$Prefix$Encoding$]
    where
    KeyField - is a name of a data column, that contains an identifier.
    Extension - is the desired extension for the generated files.
    Prefix - is the desired filename prefix for the generated files
    Encoding - is the desired encoding for the generated files.
    The line may also have some optional keywords before the closing bracket:
    oneFile$ - place all generated text into a single file instead of
    generating a file for each table row.
    After the info line, you can put your text.
    You can use following commands to handle the data:
    * [$Name$], where Name is the column header,
    will be replaced with value from the current row.
    * [$ATGLINDEX$] will be replaced with the number of a current row.
    * [$ATGHEADER$Text$] and [$ATGFOOTER$Text$] will place the given text
    at the begining or at the end of the file. You can't use other
    commands in this text.
    * [$ATGLIST$Name$Text$], where Name is a multi-column header
    (i.e. 'Col' will represent 'Col1', 'Col2', 'Col3' etc)
    will repeat the given text for each non-empty value.
    You can use other commands in Text. Also [$Name$] inside the list
    will be replaced with the value for the current row and column.
    * [$ATGLINDEX$] can be used only inside the ATGLIST text,
    will be replaced with the current column index.
    * [$ATGLISTCUT$Name$Text$] - same as ATGLIST, but the last symbol
    will be removed. Useful for removing unnecessary newlines.
    * [$ATGIF$Name$Value$Text$] will be replaced with the given text
    only if the the given column's value is the same as the given one.
    Will be replaced with the empty text otherwise. You can use other
    commands in Text.
    * [$ATGIFNOT$Name$Value$Text$] - same as ATGIF, but the column's value
    should not be equal to the given one.
    * [$ATGGREATER$Name$Value$Text$] - same as ATGIF, but the value should
    be the number and it should be greater then the given one.
    * [$ATGGREATER$Name$Value$Text$] - same as ATGGREATER, but the value
    should be less then the given one.
    * [$ATGREPLACE$Text1$Text2$] - Will replace Text1 with Text2. Replacements
    will be done after all other commands. You can't use regular expressions or
    other commands in the text.
    * [$ATGPREFIX$Text$] - Will add the given text to the filename prefix.
    You can use other commands in text, but do it carefully.
    * [$ATGSKIP$] - Skip the current row. Use only in combination with the
    ATGIF/ATGIFNOT, or you will generate nothing.
    * [$ATGPREV$Name$], where Name is the column header,
    will be replaced with the with the value of the given header from the
    previous row. ATGSKIP will be used for the first row.
    '''

    def __init__(self, filename=None, encoding='utf-8', text=''):
        '''
        Constructor.
        
        filename - name of the ATGv2 template file.
        encoding - encoding of the template file.
        text - text to use if no filename has been provided.
        '''
        if filename:
            with open(filename, 'r') as templateFile:
                topline = templateFile.readline().decode(encoding)
                if not topline.startswith('ATGV2'):
                    raise BaseException('%s is not an ATGv2 template' % (filename))
                
                key = templateFile.readline().decode(encoding)
                if key[:2] == '[$' and key[-3:-1] == '$]':
                    keyInfo = key[2:-2].split('$')
                    if len(keyInfo) < 4:
                        raise BaseException('%s has bad ATGv2 key' % (filename))
                    self.keyField = keyInfo[0]
                    self.extension = keyInfo[1]
                    self.prefix = keyInfo[2]
                    self.encoding = keyInfo[3]
                    if 'oneFile' in keyInfo[4:]:
                        self.oneFile = True
                    else:
                        self.oneFile = False
                    self.text = u''
                else:
                    raise BaseException('%s has bad ATGv2 key' % (filename))
                
                for i in templateFile.readlines():
                    self.text += i.decode(encoding)
        else:
            self.text = text
            
        self.header = u''
        self.footer = u''
        self.replacement = {}
        self._data = None
        self._multiWords = None
        
        def parse(text):
            topParts = []
            matches = {}
            
            openers = re.finditer('\[\$.*?\$', text)
            closers = re.finditer('\$\]', text) 
            ops = []
            try:
                cl = closers.next()
                while not cl is None:
                    try:
                        op = openers.next()
                        if op.start() < cl.start():
                            ops.append(op)
                        else:
                            idx = -1
                            try:
                                while ops[idx].start() > cl.start():
                                    idx -= 1
                            except:
                                raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
                            matches[ops[idx]] = cl
                            if len(ops) == 1 or idx == -len(ops):
                                topParts.append(ops[idx])
                            del ops[idx]
                            ops.append(op)
                            try:
                                cl = closers.next()
                            except StopIteration:
                                cl = None
                    except StopIteration:
                        idx = -1
                        try:
                            while ops[idx].start() > cl.start():
                                idx -= 1
                        except:
                            raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
                        matches[ops[idx]] = cl
                        if len(ops) == 1 or idx == -len(ops):
                                topParts.append(ops[idx])
                        del ops[idx]
                        try:
                            cl = closers.next()
                        except StopIteration:
                            cl = None
            except StopIteration:
                pass
            parts = []
            for i in topParts:
                startPoint = i.end()
                endPoint = matches[i].start()
                p = (i.group()[2:-1], text[startPoint:endPoint])
                if p[0].startswith('ATG'):
                    parts.insert(0, p)
                else:
                    parts.append(p)
            return parts
        
        partCommands = {}
        
        def plain(index, flow, keytag):
            if not keytag in self._data.keys:
                self.warning('WARNING: keyword not found in table - %s' % (keytag))
                return flow
            return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index]))
        partCommands['_ATGPLAIN'] = plain
        
        def nPlain(index, flow, keytag, number):
            if not keytag+str(number) in self._data.keys:
                self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number)))
                return flow
            return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index]))
        
        def lIndex(index, flow, keytag, number):
            return flow.replace('[$ATGLINDEX$]', str(number))
        
        def addHeader(index, flow, text):
            if self.header.find(text) < 0:
                self.header += text
            key = '[$ATGHEADER$' + text + '$]'
            return flow.replace(key,'')
        partCommands['ATGHEADER'] = addHeader
        
        def addFooter(index, flow, text):
            if self.footer.find(text) < 0:
                self.footer += text
            key = '[$ATGFOOTER$' + text + '$]'
            return flow.replace(key,'')
        partCommands['ATGFOOTER'] = addFooter
        
        def addList(index, flow, string):
            key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            sub = string[len(string.split('$')[0])+1:]
            keyTag = string.split('$')[0]
            subparts = parse(sub)
            myText = u''
            if not keyTag in self._multiWords:
                self.warning('Keytag %s is not multiple!' % (keyTag))
                return flow
            for j in xrange(1, self._multiWords[keyTag]+1):
                subText = sub
                for sp in subparts:
                    if sp[0] in self._multiWords:
                        subText = nPlain(index, subText, sp[0], j)
                    elif sp[0] == 'ATGLINDEX':
                        subText = lIndex(index, subText, sp[0], j)
                    elif sp[0] in partCommands:
                        subText = partCommands[sp[0]](index, subText, sp[1])
                    elif sp[1] == '':
                        subText = plain(index, subText, sp[0])
                    else:
                        self.warning('Warning: unknown command '+sp[0])
                if not self._data[keyTag+str(j), index] == u'':
                    myText += subText
            return flow.replace(key, myText)
        partCommands['ATGLIST'] = addList
        
        def addListCut(index, flow, string):
            key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            sub = string[len(string.split('$')[0])+1:]
            keyTag = string.split('$')[0]
            subparts = parse(sub)
            myText = u''
            if not keyTag in self._multiWords:
                self.warning('Keytag %s is not multiple!' % (keyTag))
                return flow
            for j in xrange(1, self._multiWords[keyTag]+1):
                subText = sub
                for sp in subparts:
                    if sp[0] in self._multiWords:
                        subText = nPlain(index, subText, sp[0], j)
                    elif sp[0] == 'ATGLINDEX':
                        subText = lIndex(index, subText, sp[0], j)
                    elif sp[0] in partCommands:
                        subText = partCommands[sp[0]](index, subText, sp[1])
                    elif sp[1] == '':
                        subText = plain(index, subText, sp[0])
                    else:
                        self.warning('Warning: unknown command '+sp[0])
                if not self._data[keyTag+str(j), index] == u'':
                    myText += subText
            return flow.replace(key, myText[:-1])
        partCommands['ATGLISTCUT'] = addListCut
        
        def addIf(index, flow, string):
            key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
            keyTag = string.split('$')[0]
            targetValue = string.split('$')[1]
            subparts = parse(sub)
            myText = u''
            if self._data[keyTag, 0] == []:
                self.warning('WARNING: keyword not found in table - %s' % (keyTag))
                return flow
            if unicode(self._data[keyTag, index]) == unicode(targetValue):
                subText = sub
                for sp in subparts:
                    if sp[0] in partCommands:
                        subText = partCommands[sp[0]](index, subText, sp[1])
                    elif sp[1] == '':
                        subText = plain(index, subText, sp[0])
                    else:
                        self.warning('Warning: unknown command '+sp[0])
                myText += subText        
            return flow.replace(key, myText)
        partCommands['ATGIF'] = addIf
    
        def addIfNot(index, flow, string):
            key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
            keyTag = string.split('$')[0]
            targetValue = string.split('$')[1]
            subparts = parse(sub)
            myText = u''
            if self._data[keyTag, 0] == []:
                self.warning('WARNING: keyword not found in table - %s' % (keyTag))
                return flow
            if not unicode(self._data[keyTag, index]) == unicode(targetValue):
                subText = sub
                for sp in subparts:
                    if sp[0] in partCommands:
                        subText = partCommands[sp[0]](index, subText, sp[1])
                    elif sp[1] == '':
                        subText = plain(index, subText, sp[0])
                    else:
                        self.warning('Warning: unknown command '+sp[0])
                myText += subText
            return flow.replace(key, myText)
        partCommands['ATGIFNOT'] = addIfNot
        
        def addGreater(index, flow, string):
            key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
            keyTag = string.split('$')[0]
            targetValue = string.split('$')[1]
            subparts = parse(sub)
            myText = u''
            if self._data[keyTag, 0] == []:
                self.warning('WARNING: keyword not found in table - %s' % (keyTag))
                return flow
            try:
                if float(self._data[keyTag, index]) > float(targetValue):
                    subText = sub
                    for sp in subparts:
                        if sp[0] in partCommands:
                            subText = partCommands[sp[0]](index, subText, sp[1])
                        elif sp[1] == '':
                            subText = plain(index, subText, sp[0])
                        else:
                            self.warning('Warning: unknown command '+sp[0])
                    myText += subText
            except:
                self.warning('ERROR: trying to compare uncomparable values!')
            return flow.replace(key, myText)
        partCommands['ATGGREATER'] = addGreater
        
        def addLess(index, flow, string):
            key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
            keyTag = string.split('$')[0]
            targetValue = string.split('$')[1]
            subparts = parse(sub)
            myText = u''
            if self._data[keyTag, 0] == []:
                self.warning('WARNING: keyword not found in table - %s' % (keyTag))
                return flow
            try:
                if float(self._data[keyTag, index]) < float(targetValue):
                    subText = sub
                    for sp in subparts:
                        if sp[0] in partCommands:
                            subText = partCommands[sp[0]](index, subText, sp[1])
                        elif sp[1] == '':
                            subText = plain(index, subText, sp[0])
                        else:
                            self.warning('Warning: unknown command '+sp[0])
                    myText += subText
            except:
                self.warning('ERROR: trying to compare uncomparable values!')
            return flow.replace(key, myText)
        partCommands['ATGLESS'] = addLess
        
        def addReplace(index, flow, string):
            key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            targetString = string[len(string.split('$')[0])+1:]
            srcString = string.split('$')[0]
            self.replacement[srcString] = targetString
            key = '[$ATGREPLACE$' + string + '$]'
            return flow.replace(key,'')
        partCommands['ATGREPLACE'] = addReplace
        
        def addPrefix(index, flow, string):
            key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
            sub = string
            subparts = parse(sub)
            for sp in subparts:
                if sp[0] in partCommands:
                    sub = partCommands[sp[0]](index, sub, sp[1])
                elif sp[1] == '':
                    sub = plain(index, sub, sp[0])
                else:
                    self.warning('Warning: unknown command '+sp[0])
            self.bonusPrefix += sub
            key = '[$ATGPREFIX$' + string + '$]'
            return flow.replace(key,'')
        partCommands['ATGPREFIX'] = addPrefix
        
        def skip(index, flow, string):
            return u'[$ATGSKIP_DO$]'
        partCommands['ATGSKIP'] = skip
        
        def prev(index, flow, string):
            key = '[$ATGPREV$%s$]' % (string.split('$')[0])
            keytag = string.split('$')[0]
            if self._data[keytag, 0] == []:
                self.warning('WARNING: keyword not found in table - %s' % (keytag))
                return flow
            if index == 0:
                self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0')
                return u'[$ATGSKIP_DO$]'
            return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1]))
        partCommands['ATGPREV'] = prev
        
        self.commands = partCommands
        self.parts = parse(self.text)
    
    def process(self, data):
        '''
        Generate text for the given data.
        '''
        self._data = data
        
        multiWords = {}
        numbs = ('1','2','3','4','5','6','7','8','9','0')
        
        for i in data.keys:
            multi = False
            while i[-1] in numbs:
                i = i[:-1]
                multi = True
            if multi:
                if i in multiWords:
                    multiWords[i] += 1
                else:
                    multiWords[i] = 1
        self._multiWords = multiWords
        
        if self.oneFile:
            out = ''
        else:
            out = {} 
        index = 0
        partCommands = self.commands
        for element in data.col_by_key(self.keyField):
            self.bonusPrefix = self.prefix
            text = self.text
            for i in self.parts:
                if i[0] in partCommands:
                    text = partCommands[i[0]](index, text, i[1])
                elif i[1] == u'':
                    text = partCommands['_ATGPLAIN'](index, text, i[0])
                else:
                    self.warning('Warning: unknown command '+i[0])
            for i in self.replacement:
                text = text.replace(i, self.replacement[i])
            self.replacement = {}
            index += 1
            
            if u'[$ATGSKIP_DO$]' in text:
                self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.')
            else:
                if self.oneFile:
                    out += text
                else:
                    name = self.bonusPrefix + unicode(element)
                    out[name] = self.header + text + self.footer
                self.log('Created %s' % (element))
        
        if self.oneFile:
            out = self.header + out + self.footer
        
        return out
    
    @staticmethod
    def express(cls, text, **kwargs):
        obj = cls()
        obj.text = text
        obj.keyField = kwargs.get('keyField', 'Index')
        obj.extension = kwargs.get('extension', '')
        obj.prefix = kwargs.get('prefix', '')
        obj.encoding = kwargs.get('encoding', 'utf-8')
        return obj