From 0c7c6c0dda782a07cfe7b31fc035393980426b70 Mon Sep 17 00:00:00 2001 From: KaiSD Date: Mon, 4 Nov 2013 23:50:10 +0400 Subject: [PATCH] moved files, added CSV export to data.py --- .gitignore | 2 + atg.py | 74 ------- atr.py | 168 ---------------- data.py | 191 ------------------ docs/atg.html | 47 ----- docs/atr.html | 70 ------- docs/data.html | 114 ----------- docs/template.html | 135 ------------- ktt_atgcsv.py | 4 +- template.py | 482 --------------------------------------------- 10 files changed, 3 insertions(+), 1284 deletions(-) delete mode 100644 atg.py delete mode 100644 atr.py delete mode 100644 data.py delete mode 100644 docs/atg.html delete mode 100644 docs/atr.html delete mode 100644 docs/data.html delete mode 100644 docs/template.html delete mode 100644 template.py diff --git a/.gitignore b/.gitignore index d2d6f36..205a347 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,5 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +GoogleIssues2CSV.py diff --git a/atg.py b/atg.py deleted file mode 100644 index 5dc9024..0000000 --- a/atg.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -''' -Advanced Text Generator module for a KaiSD Text Tools. - -(c) 2013 Ivan "Kai SD" Korystin - -License: GPLv3 -''' -from os.path import join, exists -from os import makedirs - -class ATG: - ''' - Advanced Text Generator is a class, created to generate multiple - text files from table data. - ''' - def __init__(self, data, template): - ''' - Constructor. - data - an instance of the data.Data class (i.e. CSVData) - template - an instance of the template.Template class (i.e. TemplateV2) - ''' - self.data = data - self.template = template - self.out = template.process(data) - - if type(self.out) == dict: - self.multiple = True - else: - self.multiple = False - - def write_files(self, outputDir='.'): - ''' - Write generated files to the given directory. - ''' - encoding = self.template.encoding - extension = self.template.extension - out = self.out - if self.multiple: - for name in out.keys(): - namepath = name.replace('\\', '/').split('/') - newpath = u'' - for i in namepath[:-1]: - newpath = join(newpath, i) - if not exists(join(unicode(outputDir),newpath)): - makedirs(join(unicode(outputDir),newpath)) - fname = join(unicode(outputDir),name+'.'+extension) - if fname.endswith('.'): - fname = fname[:-1] - f = open(fname, 'w') - f.write(out[name].encode(encoding)) - self.log(' Saved %s' % (name+'.'+extension)) - f.close() - else: - name = self.template.bonusPrefix + '.' + extension - if name == '.': - name = self.template.keyField - namepath = name.replace('\\', '/').split('/') - newpath = u'' - for i in namepath[:-1]: - newpath = join(newpath, i) - if not exists(join(unicode(outputDir),newpath)): - makedirs(join(unicode(outputDir),newpath)) - f = open(join(unicode(outputDir),name+'.'+extension), 'w') - f.write(out.encode(encoding)) - self.log(' Saved %s' % (name+'.'+extension)) - f.close() - - def log(self, text): - ''' - Print information - ''' - pass \ No newline at end of file diff --git a/atr.py b/atr.py deleted file mode 100644 index a848ed2..0000000 --- a/atr.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -''' -Advanced Text Replacer module for a KaiSD Text Tools. - -(c) 2013 Ivan "Kai SD" Korystin - -License: GPLv3 -''' -import re -class ATR: - ''' - Advanced Text Replacer - is a class, created to make multiple replacements - in the content or names of text file. - It can make plain replacements, or use ATG templates to do something more complex. - ''' - - def __init__(self, files): - ''' - Constructor - ''' - self.files = files - self.replacements = [] - - def plain_replace(self, pattern, string, regexp=False): - ''' - Replaces the given pattern with string in files. - ''' - if regexp: - pattern = re.compile(pattern) - self.replacements.append((pattern, string)) - - - def templated_replace(self, pattern, template, data, keyFormat='filename', regexp=False): - ''' - Replaces the given pattern with data formated by template. - Valid values for keyFormat: - filename - take data rows by filename(path ignored), key value of the data row should store the filename. - fullname - as filename, but with path. - index - take data rows in order, key value of the data row should store the index. Indexes starts with 0. - If filename or index cannot be found in data keys, pattern will not be replaced. - ''' - if regexp: - pattern = re.compile(pattern) - strings = template.process(data) - self.replacements.append((pattern, strings, keyFormat)) - - - def write_in_place(self): - ''' - Do replacement and save the files - ''' - for f in self.files: - out = u'' - with open(f, 'rb') as file: - out = file.read() - - idx = 0 - for r in self.replacements: - if type(r[0]) in (str, unicode): - pattern = re.compile(re.escape(r[0])) - string = r[1] - elif type(r[0]) is dict and len(r) == 3: - if r[2] == 'filename': - fname = f.replace('\\', '/').split('/')[-1] - string = f[1].get(fname, None) - elif r[2] == 'fullname': - string = f[1].get(f, None) - elif r[2] == 'index': - fname = f.replace('\\', '/').split('/')[-1] - string = f[1].get(idx, None) - else: - raise BaseException('Unknown data key format.') - elif hasattr(r[0], 'match'): - pattern = r[0] - string = r[1] - else: - raise BaseException('Unknown pattern type.') - if string: - out = re.sub(pattern, string, out) - - with open(f, 'wb') as outfile: - outfile.write(out) - - def write_new_files(self, outfiles): - ''' - Do replacement, but save to given files instead of the original ones. - ''' - if not len(outfiles) == len(self.files): - raise BaseException('Lists of original and new files has different length.') - - for f in self.files: - out = u'' - with open(f, 'rb') as file: - out = file.read() - - idx = 0 - for r in self.replacements: - if type(r[0]) in (str, unicode): - pattern = re.compile(re.escape(r[0])) - string = r[1] - elif type(r[0]) is dict and len(r) == 3: - if r[2] == 'filename': - fname = f.replace('\\', '/').split('/')[-1] - string = f[1].get(fname, None) - elif r[2] == 'fullname': - string = f[1].get(f, None) - elif r[2] == 'index': - fname = f.replace('\\', '/').split('/')[-1] - string = f[1].get(idx, None) - else: - raise BaseException('Unknown data key format.') - elif hasattr(r[0], 'match'): - pattern = r[0] - string = r[1] - else: - raise BaseException('Unknown pattern type.') - if string: - out = re.sub(pattern, string, out) - - with open(outfiles[self.files.index(f)], 'wb') as outfile: - outfile.write(out) - - def replace_in_names(self): - ''' - Do replacement, but in file names instead of file content. Returns the list of new file names, - you can use it with writeNewFiles() method. - ''' - out = [] - for f in self.files: - new = f - idx = 0 - for r in self.replacements: - if type(r[0]) in (str, unicode): - pattern = re.compile(re.escape(r[0])) - string = r[1] - elif type(r[0]) is dict and len(r) == 3: - if r[2] == 'filename': - fname = f.replace('\\', '/').split('/')[-1] - string = f[1].get(fname, None) - elif r[2] == 'fullname': - string = f[1].get(f, None) - elif r[2] == 'index': - fname = f.replace('\\', '/').split('/')[-1] - string = f[1].get(idx, None) - else: - raise BaseException('Unknown data key format.') - elif hasattr(r[0], 'match'): - pattern = r[0] - string = r[1] - else: - raise BaseException('Unknown pattern type.') - if string: - new = re.sub(pattern, string, new) - out.append(new) - return out - - def clear_replacements(self): - ''' - Removes all replacements. - ''' - self.replacements = [] - - def log(self, string): - ''' - Print information - ''' - pass \ No newline at end of file diff --git a/data.py b/data.py deleted file mode 100644 index c5b5cfc..0000000 --- a/data.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -''' -Data module for a KaiSD Text Tools. - -(c) 2013 Ivan "Kai SD" Korystin - -License: GPLv3 -''' - -import csv, codecs - -class Data: - ''' - Empty data class. Can be used for a subclassing or procedural data creation. - ''' - def __init__(self, *args, **kwargs): - ''' - Constructor - ''' - self.keys = [] - self.rows = [] - - def __getitem__(self, pair): - ''' - Returns a value for given key and row. - ''' - key = pair[0] - row = pair[1] - - keys = self.keys - rows = self.rows - if key in keys: - if len(rows) > row: - return rows[row][keys.index(key)] - else: - raise BaseException('Row %i not found in data' % (row)) - else: - raise BaseException('Named value %s not found in data' % (key)) - - def __setitem__(self, pair, value): - ''' - Sets a value for given key and row. - ''' - key = pair[0] - row = pair[1] - - keys = self.keys - rows = self.rows - if key in keys: - if len(rows) > row: - rows[row][keys.index(key)] = value - else: - raise BaseException('Row %i not found in data' % (row)) - else: - raise BaseException('Named value %s not found in data' % (key)) - - def __str__(self): - ''' - Returns data as string. - ''' - return str((self.keys, self.rows)) - - def __repr__(self): - return self.__str__() - - def has_key(self, key): - ''' - Returns True if given key exists in data - ''' - return key in self.keys - - def add_rows(self, n=1): - ''' - Adds some empty rows to the data. - ''' - keys = self.keys - rows = self.rows - - for n in xrange(0, n): - row = [] - for k in keys: - row.append('') - rows.append(row) - - def add_keys(self, *h): - ''' - Adds new keys to the data. - ''' - keys = self.keys - rows = self.rows - - for i in h: - keys.append(i) - for r in rows: - for i in h: - r.append('') - - def col_by_key(self, key): - ''' - Returns a column by header's name - ''' - cols = [] - keys = self.keys - rows = self.rows - if key in keys: - idx = keys.index(key) - for r in rows: - cols.append(r[idx]) - else: - raise BaseException('Named value %s not found in data' % (key)) - return tuple(cols) - - def row_by_idx(self, idx): - ''' - Returns a row by index. - ''' - return tuple(self.rows[idx]) - -class CSVData(Data): - ''' - Class for reading CSV files. - ''' - class Reader: - class Recoder: - def __init__(self, f, encoding): - self.reader = codecs.getreader(encoding)(f) - - def __iter__(self): - return self - - def next(self): - return self.reader.next().encode("utf-8") - - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - f = self.Recoder(f, encoding) - self.reader = csv.reader(f, dialect=dialect, **kwds) - - def next(self): - row = self.reader.next() - return [unicode(s, "utf-8") for s in row] - - def __iter__(self): - return self - - def __init__(self, filename, encoding='utf-8', delimiter=';', quotechar='"', **kwargs): - ''' - Constructor. - - filename - CSV table filename - encoding - CSV table encoding - delimiter - CSV table delimiter - quotechar - CSV table quotechar - transpose=True - transpose the table - ''' - csvfile = self.Reader(open(filename), encoding=encoding, delimiter=delimiter, quotechar=quotechar) - sourceData = [] - sourcekeys = None - - if kwargs.get('transpose', False): - sourcekeys = [] - rowData = [] - for i in csvfile: - sourcekeys.append(i[0]) - for k in xrange(1, len(i)): - sourceData.append([]) - try: - i[k] = int(i[k]) - except: - try: - i[k] = float(i[k]) - except: - i[k] = i[k] - rowData.append(i[1:]) - sourceData = list(map(lambda *x:x, *rowData)) - else: - for i in csvfile: - if sourcekeys is None: - sourcekeys = i - else: - for k in xrange(0, len(i)): - try: - i[k] = int(i[k]) - except: - try: - i[k] = float(i[k]) - except: - i[k] = i[k] - sourceData.append(i) - self.keys = sourcekeys - self.rows = sourceData \ No newline at end of file diff --git a/docs/atg.html b/docs/atg.html deleted file mode 100644 index bd42e50..0000000 --- a/docs/atg.html +++ /dev/null @@ -1,47 +0,0 @@ - - -Python: module atg - - - - -
 
- 
atg
index
c:\users\kaisd\documents\workbench\programming\ktt\src\atg.py
-

Advanced Text Generator module for a KaiSD Text Tools.

-(c) 2013 Ivan "Kai SD" Korystin 

-License: GPLv3

-

- - - - - -
 
-Classes
       
-
ATG -
-

- - - - - - - -
 
-class ATG
   Advanced Text Generator is a class, created to generate multiple
-text files from table data.
 
 Methods defined here:
-
__init__(self, data, template)
Constructor.
-data - an instance of the data.Data class (i.e. CSVData)
-template - an instance of the template.Template class (i.e. TemplateV2)
- -
log(self, text)
Print information
- -
write_files(self, outputDir='.')
Write generated files to the given directory.
- -

- \ No newline at end of file diff --git a/docs/atr.html b/docs/atr.html deleted file mode 100644 index e4ec319..0000000 --- a/docs/atr.html +++ /dev/null @@ -1,70 +0,0 @@ - - -Python: module atr - - - - -
 
- 
atr
index
c:\users\kaisd\documents\workbench\programming\ktt\src\atr.py
-

Advanced Text Replacer module for a KaiSD Text Tools.

-(c) 2013 Ivan "Kai SD" Korystin 

-License: GPLv3

-

- - - - - -
 
-Modules
       
re
-

- - - - - -
 
-Classes
       
-
ATR -
-

- - - - - - - -
 
-class ATR
   Advanced Text Replacer - is a class, created to make multiple replacements
-in the content or names of text file.
-It can make plain replacements, or use ATG templates to do something more complex.
 
 Methods defined here:
-
__init__(self, files)
Constructor
- -
clear_replacements(self)
Removes all replacements.
- -
log(self, string)
Print information
- -
plain_replace(self, pattern, string, regexp=False)
Replaces the given pattern with string in files.
- -
replace_in_names(self)
Do replacement, but in file names instead of file content. Returns the list of new file names,
-you can use it with writeNewFiles() method.
- -
templated_replace(self, pattern, template, data, keyFormat='filename', regexp=False)
Replaces the given pattern with data formated by template.
-Valid values for keyFormat:
-filename - take data rows by filename(path ignored), key value of the data row should store the filename.
-fullname - as filename, but with path.
-index - take data rows in order, key value of the data row should store the index. Indexes starts with 0.
-If filename or index cannot be found in data keys, pattern will not be replaced.
- -
write_in_place(self)
Do replacement and save the files
- -
write_new_files(self, outfiles)
Do replacement, but save to given files instead of the original ones.
- -

- \ No newline at end of file diff --git a/docs/data.html b/docs/data.html deleted file mode 100644 index 6a28322..0000000 --- a/docs/data.html +++ /dev/null @@ -1,114 +0,0 @@ - - -Python: module data - - - - -
 
- 
data
index
c:\users\kaisd\documents\workbench\programming\ktt\src\data.py
-

Data module for a KaiSD Text Tools.

-(c) 2013 Ivan "Kai SD" Korystin 

-License: GPLv3

-

- - - - - -
 
-Modules
       
codecs
-
csv
-

- - - - - -
 
-Classes
       
-
Data -
-
-
CSVData -
-
-
-

- - - - - - - -
 
-class CSVData(Data)
   Class for reading CSV files.
 
 Methods defined here:
-
__init__(self, filename, encoding='utf-8', delimiter=';', quotechar='"', **kwargs)
Constructor.

-filename - CSV table filename
-encoding - CSV table encoding
-delimiter - CSV table delimiter
-quotechar - CSV table quotechar
-transpose=True - transpose the table
- -
-Data and other attributes defined here:
-
Reader = <class data.Reader>
- -
-Methods inherited from Data:
-
__getitem__(self, pair)
Returns a value for given key and row.
- -
__repr__(self)
- -
__setitem__(self, pair, value)
Sets a value for given key and row.
- -
__str__(self)
Returns data as string.
- -
add_keys(self, *h)
Adds new keys to the data.
- -
add_rows(self, n=1)
Adds some empty rows to the data.
- -
col_by_key(self, key)
Returns a column by header's name
- -
has_key(self, key)
Returns True if given key exists in data
- -
row_by_idx(self, idx)
Returns a row by index.
- -

- - - - - - - -
 
-class Data
   Empty data class. Can be used for a subclassing or procedural data creation.
 
 Methods defined here:
-
__getitem__(self, pair)
Returns a value for given key and row.
- -
__init__(self, *args, **kwargs)
Constructor
- -
__repr__(self)
- -
__setitem__(self, pair, value)
Sets a value for given key and row.
- -
__str__(self)
Returns data as string.
- -
add_keys(self, *h)
Adds new keys to the data.
- -
add_rows(self, n=1)
Adds some empty rows to the data.
- -
col_by_key(self, key)
Returns a column by header's name
- -
has_key(self, key)
Returns True if given key exists in data
- -
row_by_idx(self, idx)
Returns a row by index.
- -

- \ No newline at end of file diff --git a/docs/template.html b/docs/template.html deleted file mode 100644 index 7f80dfd..0000000 --- a/docs/template.html +++ /dev/null @@ -1,135 +0,0 @@ - - -Python: module template - - - - -
 
- 
template
index
c:\users\kaisd\documents\workbench\programming\ktt\src\template.py
-

Template module for a KaiSD Text Tools.

-(c) 2013 Ivan "Kai SD" Korystin 

-License: GPLv3

-

- - - - - -
 
-Modules
       
re
-

- - - - - -
 
-Classes
       
-
Template -
-
-
TemplateV2 -
-
-
-

- - - - - - - -
 
-class Template
   Empty template class. Generates empty text.
 
 Methods defined here:
-
log(self, text)
Print information
- -
process(self, data)
Replace this method in subclasses.
- -
warning(self, text)
Prints a warning
- -

- - - - - - - -
 
-class TemplateV2(Template)
   Class for reading ATGv2 templates.

-ATGv2 template file should be a plain text file, starting with the line
-ATGV2
-followed by the info line:
-[$KeyField$Extension$Prefix$Encoding$]
-where
-KeyField - is a name of a data column, that contains an identifier.
-Extension - is the desired extension for the generated files.
-Prefix - is the desired filename prefix for the generated files
-Encoding - is the desired encoding for the generated files.
-The line may also have some optional keywords before the closing bracket:
-oneFile$ - place all generated text into a single file instead of
-generating a file for each table row.
-After the info line, you can put your text.
-You can use following commands to handle the data:
-* [$Name$], where Name is the column header,
-will be replaced with value from the current row.
-* [$ATGLINDEX$] will be replaced with the number of a current row.
-* [$ATGHEADER$Text$] and [$ATGFOOTER$Text$] will place the given text
-at the begining or at the end of the file. You can't use other
-commands in this text.
-* [$ATGLIST$Name$Text$], where Name is a multi-column header
-(i.e. 'Col' will represent 'Col1', 'Col2', 'Col3' etc)
-will repeat the given text for each non-empty value.
-You can use other commands in Text. Also [$Name$] inside the list
-will be replaced with the value for the current row and column.
-* [$ATGLINDEX$] can be used only inside the ATGLIST text,
-will be replaced with the current column index.
-* [$ATGLISTCUT$Name$Text$] - same as ATGLIST, but the last symbol
-will be removed. Useful for removing unnecessary newlines.
-* [$ATGIF$Name$Value$Text$] will be replaced with the given text
-only if the the given column's value is the same as the given one.
-Will be replaced with the empty text otherwise. You can use other
-commands in Text.
-* [$ATGIFNOT$Name$Value$Text$] - same as ATGIF, but the column's value
-should not be equal to the given one.
-* [$ATGGREATER$Name$Value$Text$] - same as ATGIF, but the value should
-be the number and it should be greater then the given one.
-* [$ATGGREATER$Name$Value$Text$] - same as ATGGREATER, but the value
-should be less then the given one.
-* [$ATGREPLACE$Text1$Text2$] - Will replace Text1 with Text2. Replacements
-will be done after all other commands. You can't use regular expressions or
-other commands in the text.
-* [$ATGPREFIX$Text$] - Will add the given text to the filename prefix.
-You can use other commands in text, but do it carefully.
-* [$ATGSKIP$] - Skip the current row. Use only in combination with the
-ATGIF/ATGIFNOT, or you will generate nothing.
-* [$ATGPREV$Name$], where Name is the column header,
-will be replaced with the with the value of the given header from the
-previous row. ATGSKIP will be used for the first row.
 
 Methods defined here:
-
__init__(self, filename=None, encoding='utf-8', text='')
Constructor.

-filename - name of the ATGv2 template file.
-encoding - encoding of the template file.
-text - text to use if no filename has been provided.
- -
process(self, data)
Generate text for the given data.
- -
-Static methods defined here:
-
express(cls, text, **kwargs)
- -
-Methods inherited from Template:
-
log(self, text)
Print information
- -
warning(self, text)
Prints a warning
- -

- \ No newline at end of file diff --git a/ktt_atgcsv.py b/ktt_atgcsv.py index 08421d4..90527c6 100755 --- a/ktt_atgcsv.py +++ b/ktt_atgcsv.py @@ -10,10 +10,8 @@ Part of KaiSD Text Tools License: GPLv3 ''' from sys import argv -from atg import ATG -from data import CSVData -from template import TemplateV2 from os.path import split +from ktt import ATG, CSVData, TemplateV2 if __name__ == '__main__': if len(argv) == 3: diff --git a/template.py b/template.py deleted file mode 100644 index 39e5eb4..0000000 --- a/template.py +++ /dev/null @@ -1,482 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -''' -Template module for a KaiSD Text Tools. - -(c) 2013 Ivan "Kai SD" Korystin - -License: GPLv3 -''' - -import re -class Template: - ''' - Empty template class. Generates empty text. - ''' - def process(self, data): - ''' - Replace this method in subclasses. - ''' - return '' - - def warning(self, text): - ''' - Prints a warning - ''' - print text - - def log(self, text): - ''' - Print information - ''' - pass - -class TemplateV2(Template): - ''' - Class for reading ATGv2 templates. - - ATGv2 template file should be a plain text file, starting with the line - ATGV2 - followed by the info line: - [$KeyField$Extension$Prefix$Encoding$] - where - KeyField - is a name of a data column, that contains an identifier. - Extension - is the desired extension for the generated files. - Prefix - is the desired filename prefix for the generated files - Encoding - is the desired encoding for the generated files. - The line may also have some optional keywords before the closing bracket: - oneFile$ - place all generated text into a single file instead of - generating a file for each table row. - After the info line, you can put your text. - You can use following commands to handle the data: - * [$Name$], where Name is the column header, - will be replaced with value from the current row. - * [$ATGLINDEX$] will be replaced with the number of a current row. - * [$ATGHEADER$Text$] and [$ATGFOOTER$Text$] will place the given text - at the begining or at the end of the file. You can't use other - commands in this text. - * [$ATGLIST$Name$Text$], where Name is a multi-column header - (i.e. 'Col' will represent 'Col1', 'Col2', 'Col3' etc) - will repeat the given text for each non-empty value. - You can use other commands in Text. Also [$Name$] inside the list - will be replaced with the value for the current row and column. - * [$ATGLINDEX$] can be used only inside the ATGLIST text, - will be replaced with the current column index. - * [$ATGLISTCUT$Name$Text$] - same as ATGLIST, but the last symbol - will be removed. Useful for removing unnecessary newlines. - * [$ATGIF$Name$Value$Text$] will be replaced with the given text - only if the the given column's value is the same as the given one. - Will be replaced with the empty text otherwise. You can use other - commands in Text. - * [$ATGIFNOT$Name$Value$Text$] - same as ATGIF, but the column's value - should not be equal to the given one. - * [$ATGGREATER$Name$Value$Text$] - same as ATGIF, but the value should - be the number and it should be greater then the given one. - * [$ATGGREATER$Name$Value$Text$] - same as ATGGREATER, but the value - should be less then the given one. - * [$ATGREPLACE$Text1$Text2$] - Will replace Text1 with Text2. Replacements - will be done after all other commands. You can't use regular expressions or - other commands in the text. - * [$ATGPREFIX$Text$] - Will add the given text to the filename prefix. - You can use other commands in text, but do it carefully. - * [$ATGSKIP$] - Skip the current row. Use only in combination with the - ATGIF/ATGIFNOT, or you will generate nothing. - * [$ATGPREV$Name$], where Name is the column header, - will be replaced with the with the value of the given header from the - previous row. ATGSKIP will be used for the first row. - ''' - - def __init__(self, filename=None, encoding='utf-8', text=''): - ''' - Constructor. - - filename - name of the ATGv2 template file. - encoding - encoding of the template file. - text - text to use if no filename has been provided. - ''' - if filename: - with open(filename, 'r') as templateFile: - topline = templateFile.readline().decode(encoding) - if not topline.startswith('ATGV2'): - raise BaseException('%s is not an ATGv2 template' % (filename)) - - key = templateFile.readline().decode(encoding) - if key[:2] == '[$' and key[-3:-1] == '$]': - keyInfo = key[2:-2].split('$') - if len(keyInfo) < 4: - raise BaseException('%s has bad ATGv2 key' % (filename)) - self.keyField = keyInfo[0] - self.extension = keyInfo[1] - self.prefix = keyInfo[2] - self.encoding = keyInfo[3] - if 'oneFile' in keyInfo[4:]: - self.oneFile = True - else: - self.oneFile = False - self.text = u'' - else: - raise BaseException('%s has bad ATGv2 key' % (filename)) - - for i in templateFile.readlines(): - self.text += i.decode(encoding) - else: - self.text = text - - self.header = u'' - self.footer = u'' - self.replacement = {} - self._data = None - self._multiWords = None - - def parse(text): - topParts = [] - matches = {} - - openers = re.finditer('\[\$.*?\$', text) - closers = re.finditer('\$\]', text) - ops = [] - try: - cl = closers.next() - while not cl is None: - try: - op = openers.next() - if op.start() < cl.start(): - ops.append(op) - else: - idx = -1 - try: - while ops[idx].start() > cl.start(): - idx -= 1 - except: - raise BaseException('Template parsing error: can not find the opener for '+str(cl.start())) - matches[ops[idx]] = cl - if len(ops) == 1 or idx == -len(ops): - topParts.append(ops[idx]) - del ops[idx] - ops.append(op) - try: - cl = closers.next() - except StopIteration: - cl = None - except StopIteration: - idx = -1 - try: - while ops[idx].start() > cl.start(): - idx -= 1 - except: - raise BaseException('Template parsing error: can not find the opener for '+str(cl.start())) - matches[ops[idx]] = cl - if len(ops) == 1 or idx == -len(ops): - topParts.append(ops[idx]) - del ops[idx] - try: - cl = closers.next() - except StopIteration: - cl = None - except StopIteration: - pass - parts = [] - for i in topParts: - startPoint = i.end() - endPoint = matches[i].start() - p = (i.group()[2:-1], text[startPoint:endPoint]) - if p[0].startswith('ATG'): - parts.insert(0, p) - else: - parts.append(p) - return parts - - partCommands = {} - - def plain(index, flow, keytag): - if not keytag in self._data.keys: - self.warning('WARNING: keyword not found in table - %s' % (keytag)) - return flow - return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index])) - partCommands['_ATGPLAIN'] = plain - - def nPlain(index, flow, keytag, number): - if not keytag+str(number) in self._data.keys: - self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number))) - return flow - return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index])) - - def lIndex(index, flow, keytag, number): - return flow.replace('[$ATGLINDEX$]', str(number)) - - def addHeader(index, flow, text): - if self.header.find(text) < 0: - self.header += text - key = '[$ATGHEADER$' + text + '$]' - return flow.replace(key,'') - partCommands['ATGHEADER'] = addHeader - - def addFooter(index, flow, text): - if self.footer.find(text) < 0: - self.footer += text - key = '[$ATGFOOTER$' + text + '$]' - return flow.replace(key,'') - partCommands['ATGFOOTER'] = addFooter - - def addList(index, flow, string): - key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - sub = string[len(string.split('$')[0])+1:] - keyTag = string.split('$')[0] - subparts = parse(sub) - myText = u'' - if not keyTag in self._multiWords: - self.warning('Keytag %s is not multiple!' % (keyTag)) - return flow - for j in xrange(1, self._multiWords[keyTag]+1): - subText = sub - for sp in subparts: - if sp[0] in self._multiWords: - subText = nPlain(index, subText, sp[0], j) - elif sp[0] == 'ATGLINDEX': - subText = lIndex(index, subText, sp[0], j) - elif sp[0] in partCommands: - subText = partCommands[sp[0]](index, subText, sp[1]) - elif sp[1] == '': - subText = plain(index, subText, sp[0]) - else: - self.warning('Warning: unknown command '+sp[0]) - if not self._data[keyTag+str(j), index] == u'': - myText += subText - return flow.replace(key, myText) - partCommands['ATGLIST'] = addList - - def addListCut(index, flow, string): - key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - sub = string[len(string.split('$')[0])+1:] - keyTag = string.split('$')[0] - subparts = parse(sub) - myText = u'' - if not keyTag in self._multiWords: - self.warning('Keytag %s is not multiple!' % (keyTag)) - return flow - for j in xrange(1, self._multiWords[keyTag]+1): - subText = sub - for sp in subparts: - if sp[0] in self._multiWords: - subText = nPlain(index, subText, sp[0], j) - elif sp[0] == 'ATGLINDEX': - subText = lIndex(index, subText, sp[0], j) - elif sp[0] in partCommands: - subText = partCommands[sp[0]](index, subText, sp[1]) - elif sp[1] == '': - subText = plain(index, subText, sp[0]) - else: - self.warning('Warning: unknown command '+sp[0]) - if not self._data[keyTag+str(j), index] == u'': - myText += subText - return flow.replace(key, myText[:-1]) - partCommands['ATGLISTCUT'] = addListCut - - def addIf(index, flow, string): - key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] - keyTag = string.split('$')[0] - targetValue = string.split('$')[1] - subparts = parse(sub) - myText = u'' - if self._data[keyTag, 0] == []: - self.warning('WARNING: keyword not found in table - %s' % (keyTag)) - return flow - if unicode(self._data[keyTag, index]) == unicode(targetValue): - subText = sub - for sp in subparts: - if sp[0] in partCommands: - subText = partCommands[sp[0]](index, subText, sp[1]) - elif sp[1] == '': - subText = plain(index, subText, sp[0]) - else: - self.warning('Warning: unknown command '+sp[0]) - myText += subText - return flow.replace(key, myText) - partCommands['ATGIF'] = addIf - - def addIfNot(index, flow, string): - key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] - keyTag = string.split('$')[0] - targetValue = string.split('$')[1] - subparts = parse(sub) - myText = u'' - if self._data[keyTag, 0] == []: - self.warning('WARNING: keyword not found in table - %s' % (keyTag)) - return flow - if not unicode(self._data[keyTag, index]) == unicode(targetValue): - subText = sub - for sp in subparts: - if sp[0] in partCommands: - subText = partCommands[sp[0]](index, subText, sp[1]) - elif sp[1] == '': - subText = plain(index, subText, sp[0]) - else: - self.warning('Warning: unknown command '+sp[0]) - myText += subText - return flow.replace(key, myText) - partCommands['ATGIFNOT'] = addIfNot - - def addGreater(index, flow, string): - key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] - keyTag = string.split('$')[0] - targetValue = string.split('$')[1] - subparts = parse(sub) - myText = u'' - if self._data[keyTag, 0] == []: - self.warning('WARNING: keyword not found in table - %s' % (keyTag)) - return flow - try: - if float(self._data[keyTag, index]) > float(targetValue): - subText = sub - for sp in subparts: - if sp[0] in partCommands: - subText = partCommands[sp[0]](index, subText, sp[1]) - elif sp[1] == '': - subText = plain(index, subText, sp[0]) - else: - self.warning('Warning: unknown command '+sp[0]) - myText += subText - except: - self.warning('ERROR: trying to compare uncomparable values!') - return flow.replace(key, myText) - partCommands['ATGGREATER'] = addGreater - - def addLess(index, flow, string): - key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] - keyTag = string.split('$')[0] - targetValue = string.split('$')[1] - subparts = parse(sub) - myText = u'' - if self._data[keyTag, 0] == []: - self.warning('WARNING: keyword not found in table - %s' % (keyTag)) - return flow - try: - if float(self._data[keyTag, index]) < float(targetValue): - subText = sub - for sp in subparts: - if sp[0] in partCommands: - subText = partCommands[sp[0]](index, subText, sp[1]) - elif sp[1] == '': - subText = plain(index, subText, sp[0]) - else: - self.warning('Warning: unknown command '+sp[0]) - myText += subText - except: - self.warning('ERROR: trying to compare uncomparable values!') - return flow.replace(key, myText) - partCommands['ATGLESS'] = addLess - - def addReplace(index, flow, string): - key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - targetString = string[len(string.split('$')[0])+1:] - srcString = string.split('$')[0] - self.replacement[srcString] = targetString - key = '[$ATGREPLACE$' + string + '$]' - return flow.replace(key,'') - partCommands['ATGREPLACE'] = addReplace - - def addPrefix(index, flow, string): - key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) - sub = string - subparts = parse(sub) - for sp in subparts: - if sp[0] in partCommands: - sub = partCommands[sp[0]](index, sub, sp[1]) - elif sp[1] == '': - sub = plain(index, sub, sp[0]) - else: - self.warning('Warning: unknown command '+sp[0]) - self.bonusPrefix += sub - key = '[$ATGPREFIX$' + string + '$]' - return flow.replace(key,'') - partCommands['ATGPREFIX'] = addPrefix - - def skip(index, flow, string): - return u'[$ATGSKIP_DO$]' - partCommands['ATGSKIP'] = skip - - def prev(index, flow, string): - key = '[$ATGPREV$%s$]' % (string.split('$')[0]) - keytag = string.split('$')[0] - if self._data[keytag, 0] == []: - self.warning('WARNING: keyword not found in table - %s' % (keytag)) - return flow - if index == 0: - self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0') - return u'[$ATGSKIP_DO$]' - return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1])) - partCommands['ATGPREV'] = prev - - self.commands = partCommands - self.parts = parse(self.text) - - def process(self, data): - ''' - Generate text for the given data. - ''' - self._data = data - - multiWords = {} - numbs = ('1','2','3','4','5','6','7','8','9','0') - - for i in data.keys: - multi = False - while i[-1] in numbs: - i = i[:-1] - multi = True - if multi: - if i in multiWords: - multiWords[i] += 1 - else: - multiWords[i] = 1 - self._multiWords = multiWords - - if self.oneFile: - out = '' - else: - out = {} - index = 0 - partCommands = self.commands - for element in data.col_by_key(self.keyField): - self.bonusPrefix = self.prefix - text = self.text - for i in self.parts: - if i[0] in partCommands: - text = partCommands[i[0]](index, text, i[1]) - elif i[1] == u'': - text = partCommands['_ATGPLAIN'](index, text, i[0]) - else: - self.warning('Warning: unknown command '+i[0]) - for i in self.replacement: - text = text.replace(i, self.replacement[i]) - self.replacement = {} - index += 1 - - if u'[$ATGSKIP_DO$]' in text: - self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.') - else: - if self.oneFile: - out += text - else: - name = self.bonusPrefix + unicode(element) - out[name] = self.header + text + self.footer - self.log('Created %s' % (element)) - - if self.oneFile: - out = self.header + out + self.footer - - return out - - @staticmethod - def express(cls, text, **kwargs): - obj = cls() - obj.text = text - obj.keyField = kwargs.get('keyField', 'Index') - obj.extension = kwargs.get('extension', '') - obj.prefix = kwargs.get('prefix', '') - obj.encoding = kwargs.get('encoding', 'utf-8') - return obj \ No newline at end of file