From 6e29f14fc66f1555142158b3b94ae01e9603bd58 Mon Sep 17 00:00:00 2001 From: KaiSD Date: Mon, 4 Nov 2013 23:50:28 +0400 Subject: [PATCH] moved files, added CSV export to data.py --- docs/ktt.atg.html | 60 +++++ docs/ktt.atr.html | 83 +++++++ docs/ktt.data.html | 170 +++++++++++++++ docs/ktt.html | 25 +++ docs/ktt.template.html | 162 ++++++++++++++ ktt/__init__.py | 4 + ktt/atg.py | 74 +++++++ ktt/atr.py | 168 ++++++++++++++ ktt/data.py | 270 +++++++++++++++++++++++ ktt/template.py | 482 +++++++++++++++++++++++++++++++++++++++++ 10 files changed, 1498 insertions(+) create mode 100644 docs/ktt.atg.html create mode 100644 docs/ktt.atr.html create mode 100644 docs/ktt.data.html create mode 100644 docs/ktt.html create mode 100644 docs/ktt.template.html create mode 100644 ktt/__init__.py create mode 100644 ktt/atg.py create mode 100644 ktt/atr.py create mode 100644 ktt/data.py create mode 100644 ktt/template.py diff --git a/docs/ktt.atg.html b/docs/ktt.atg.html new file mode 100644 index 0000000..6394456 --- /dev/null +++ b/docs/ktt.atg.html @@ -0,0 +1,60 @@ + + +Python: module ktt.atg + + + + +
 
+ 
ktt.atg
index
/home/kaisd/Мастерская/projects/KTT/ktt/atg.py
+

Advanced Text Generator module for a KaiSD Text Tools.

+(c) 2013 Ivan "Kai SD" Korystin 

+License: GPLv3

+

+ + + + + +
 
+Classes
       
+
__builtin__.object +
+
+
ATG +
+
+
+

+ + + + + + + +
 
+class ATG(__builtin__.object)
   Advanced Text Generator is a class, created to generate multiple
+text files from table data.
 
 Methods defined here:
+
__init__(self, data, template)
Constructor.
+data - an instance of the data.Data class (i.e. CSVData)
+template - an instance of the template.Template class (i.e. TemplateV2)
+ +
log(self, text)
Print information
+ +
write_files(self, outputDir='.')
Write generated files to the given directory.
+ +
+Data descriptors defined here:
+
__dict__
+
dictionary for instance variables (if defined)
+
+
__weakref__
+
list of weak references to the object (if defined)
+
+

+ \ No newline at end of file diff --git a/docs/ktt.atr.html b/docs/ktt.atr.html new file mode 100644 index 0000000..175677b --- /dev/null +++ b/docs/ktt.atr.html @@ -0,0 +1,83 @@ + + +Python: module ktt.atr + + + + +
 
+ 
ktt.atr
index
/home/kaisd/Мастерская/projects/KTT/ktt/atr.py
+

Advanced Text Replacer module for a KaiSD Text Tools.

+(c) 2013 Ivan "Kai SD" Korystin 

+License: GPLv3

+

+ + + + + +
 
+Modules
       
re
+

+ + + + + +
 
+Classes
       
+
__builtin__.object +
+
+
ATR +
+
+
+

+ + + + + + + +
 
+class ATR(__builtin__.object)
   Advanced Text Replacer - is a class, created to make multiple replacements
+in the content or names of text file.
+It can make plain replacements, or use ATG templates to do something more complex.
 
 Methods defined here:
+
__init__(self, files)
Constructor
+ +
clear_replacements(self)
Removes all replacements.
+ +
log(self, string)
Print information
+ +
plain_replace(self, pattern, string, regexp=False)
Replaces the given pattern with string in files.
+ +
replace_in_names(self)
Do replacement, but in file names instead of file content. Returns the list of new file names,
+you can use it with writeNewFiles() method.
+ +
templated_replace(self, pattern, template, data, keyFormat='filename', regexp=False)
Replaces the given pattern with data formated by template.
+Valid values for keyFormat:
+filename - take data rows by filename(path ignored), key value of the data row should store the filename.
+fullname - as filename, but with path.
+index - take data rows in order, key value of the data row should store the index. Indexes starts with 0.
+If filename or index cannot be found in data keys, pattern will not be replaced.
+ +
write_in_place(self)
Do replacement and save the files
+ +
write_new_files(self, outfiles)
Do replacement, but save to given files instead of the original ones.
+ +
+Data descriptors defined here:
+
__dict__
+
dictionary for instance variables (if defined)
+
+
__weakref__
+
list of weak references to the object (if defined)
+
+

+ \ No newline at end of file diff --git a/docs/ktt.data.html b/docs/ktt.data.html new file mode 100644 index 0000000..2c241a1 --- /dev/null +++ b/docs/ktt.data.html @@ -0,0 +1,170 @@ + + +Python: module ktt.data + + + + +
 
+ 
ktt.data
index
/home/kaisd/Мастерская/projects/KTT/ktt/data.py
+

Data module for a KaiSD Text Tools.

+(c) 2013 Ivan "Kai SD" Korystin 

+License: GPLv3

+

+ + + + + +
 
+Modules
       
cStringIO
+
codecs
+
csv
+

+ + + + + +
 
+Classes
       
+
__builtin__.object +
+
+
Data +
+
+
CSVData +
+
+
+
+
+

+ + + + + + + +
 
+class CSVData(Data)
   Class for reading CSV files.
 
 
Method resolution order:
+
CSVData
+
Data
+
__builtin__.object
+
+
+Methods defined here:
+
__init__(self, file, encoding='utf-8', delimiter=';', quotechar='"', **kwargs)
Constructor.

+filename - CSV table filename
+encoding - CSV table encoding (default: utf-8)
+delimiter - CSV table delimiter (default: ;)
+quotechar - CSV table quotechar (default: ")
+ +
export_csv(self, filename, encoding='utf-8', delimiter=';', quotechar='"', **kwargs)
Saves the data to CSV file

+filename - CSV table filename
+encoding - CSV table encoding (default: utf-8)
+delimiter - CSV table delimiter (default: ;)
+quotechar - CSV table quotechar (default: ")
+ +
+Data and other attributes defined here:
+
Reader = <class ktt.data.Reader>
+ +
Writer = <class ktt.data.Writer>
+ +
+Methods inherited from Data:
+
__getitem__(self, pair)
Returns a value for given key and row.
+ +
__repr__(self)
+ +
__setitem__(self, pair, value)
Sets a value for given key and row.
+ +
__str__(self)
Returns data as string.
+ +
add_data(self, other)
Adds rows from another data table to this one.
+ +
add_keys(self, *h)
Adds new keys to the data.
+ +
add_rows(self, n=1)
Adds some empty rows to the data.
+ +
col_by_idx(self, idx)
Returns a column by header's index
+ +
col_by_key(self, key)
Returns a column by header's name
+ +
del_row(self, idx)
Removes giver row from data
+ +
has_key(self, key)
Returns True if given key exists in data
+ +
row_by_idx(self, idx)
Returns a row by index.
+ +
transpose(self, key_idx=0)
Returns the transposed copy of the data.

+key_idx - index of the column, that contains keywords (default: 0)
+ +
+Data descriptors inherited from Data:
+
__dict__
+
dictionary for instance variables (if defined)
+
+
__weakref__
+
list of weak references to the object (if defined)
+
+

+ + + + + + + +
 
+class Data(__builtin__.object)
   Empty data class. Can be used for a subclassing or procedural data creation.
 
 Methods defined here:
+
__getitem__(self, pair)
Returns a value for given key and row.
+ +
__init__(self, *args, **kwargs)
Constructor
+ +
__repr__(self)
+ +
__setitem__(self, pair, value)
Sets a value for given key and row.
+ +
__str__(self)
Returns data as string.
+ +
add_data(self, other)
Adds rows from another data table to this one.
+ +
add_keys(self, *h)
Adds new keys to the data.
+ +
add_rows(self, n=1)
Adds some empty rows to the data.
+ +
col_by_idx(self, idx)
Returns a column by header's index
+ +
col_by_key(self, key)
Returns a column by header's name
+ +
del_row(self, idx)
Removes giver row from data
+ +
has_key(self, key)
Returns True if given key exists in data
+ +
row_by_idx(self, idx)
Returns a row by index.
+ +
transpose(self, key_idx=0)
Returns the transposed copy of the data.

+key_idx - index of the column, that contains keywords (default: 0)
+ +
+Data descriptors defined here:
+
__dict__
+
dictionary for instance variables (if defined)
+
+
__weakref__
+
list of weak references to the object (if defined)
+
+

+ \ No newline at end of file diff --git a/docs/ktt.html b/docs/ktt.html new file mode 100644 index 0000000..5117d93 --- /dev/null +++ b/docs/ktt.html @@ -0,0 +1,25 @@ + + +Python: package ktt + + + + +
 
+ 
ktt
index
/home/kaisd/Мастерская/projects/KTT/ktt/__init__.py
+

+

+ + + + + +
 
+Package Contents
       
atg
+
atr
+
data
+
template
+
+ \ No newline at end of file diff --git a/docs/ktt.template.html b/docs/ktt.template.html new file mode 100644 index 0000000..db75e9b --- /dev/null +++ b/docs/ktt.template.html @@ -0,0 +1,162 @@ + + +Python: module ktt.template + + + + +
 
+ 
ktt.template
index
/home/kaisd/Мастерская/projects/KTT/ktt/template.py
+

Template module for a KaiSD Text Tools.

+(c) 2013 Ivan "Kai SD" Korystin 

+License: GPLv3

+

+ + + + + +
 
+Modules
       
re
+

+ + + + + +
 
+Classes
       
+
__builtin__.object +
+
+
Template +
+
+
TemplateV2 +
+
+
+
+
+

+ + + + + + + +
 
+class Template(__builtin__.object)
   Empty template class. Generates empty text.
 
 Methods defined here:
+
log(self, text)
Print information
+ +
process(self, data)
Replace this method in subclasses.
+ +
warning(self, text)
Prints a warning
+ +
+Data descriptors defined here:
+
__dict__
+
dictionary for instance variables (if defined)
+
+
__weakref__
+
list of weak references to the object (if defined)
+
+

+ + + + + + + +
 
+class TemplateV2(Template)
   Class for reading ATGv2 templates.

+ATGv2 template file should be a plain text file, starting with the line
+ATGV2
+followed by the info line:
+[$KeyField$Extension$Prefix$Encoding$]
+where
+KeyField - is a name of a data column, that contains an identifier.
+Extension - is the desired extension for the generated files.
+Prefix - is the desired filename prefix for the generated files
+Encoding - is the desired encoding for the generated files.
+The line may also have some optional keywords before the closing bracket:
+oneFile$ - place all generated text into a single file instead of
+generating a file for each table row.
+After the info line, you can put your text.
+You can use following commands to handle the data:
+* [$Name$], where Name is the column header,
+will be replaced with value from the current row.
+* [$ATGLINDEX$] will be replaced with the number of a current row.
+* [$ATGHEADER$Text$] and [$ATGFOOTER$Text$] will place the given text
+at the begining or at the end of the file. You can't use other
+commands in this text.
+* [$ATGLIST$Name$Text$], where Name is a multi-column header
+(i.e. 'Col' will represent 'Col1', 'Col2', 'Col3' etc)
+will repeat the given text for each non-empty value.
+You can use other commands in Text. Also [$Name$] inside the list
+will be replaced with the value for the current row and column.
+* [$ATGLINDEX$] can be used only inside the ATGLIST text,
+will be replaced with the current column index.
+* [$ATGLISTCUT$Name$Text$] - same as ATGLIST, but the last symbol
+will be removed. Useful for removing unnecessary newlines.
+* [$ATGIF$Name$Value$Text$] will be replaced with the given text
+only if the the given column's value is the same as the given one.
+Will be replaced with the empty text otherwise. You can use other
+commands in Text.
+* [$ATGIFNOT$Name$Value$Text$] - same as ATGIF, but the column's value
+should not be equal to the given one.
+* [$ATGGREATER$Name$Value$Text$] - same as ATGIF, but the value should
+be the number and it should be greater then the given one.
+* [$ATGGREATER$Name$Value$Text$] - same as ATGGREATER, but the value
+should be less then the given one.
+* [$ATGREPLACE$Text1$Text2$] - Will replace Text1 with Text2. Replacements
+will be done after all other commands. You can't use regular expressions or
+other commands in the text.
+* [$ATGPREFIX$Text$] - Will add the given text to the filename prefix.
+You can use other commands in text, but do it carefully.
+* [$ATGSKIP$] - Skip the current row. Use only in combination with the
+ATGIF/ATGIFNOT, or you will generate nothing.
+* [$ATGPREV$Name$], where Name is the column header,
+will be replaced with the with the value of the given header from the
+previous row. ATGSKIP will be used for the first row.
 
 
Method resolution order:
+
TemplateV2
+
Template
+
__builtin__.object
+
+
+Methods defined here:
+
__init__(self, filename=None, encoding='utf-8', text='')
Constructor.

+filename - name of the ATGv2 template file.
+encoding - encoding of the template file.
+text - text to use if no filename has been provided.
+ +
process(self, data)
Generate text for the given data.
+ +
+Static methods defined here:
+
express(cls, text, **kwargs)
+ +
+Methods inherited from Template:
+
log(self, text)
Print information
+ +
warning(self, text)
Prints a warning
+ +
+Data descriptors inherited from Template:
+
__dict__
+
dictionary for instance variables (if defined)
+
+
__weakref__
+
list of weak references to the object (if defined)
+
+

+ \ No newline at end of file diff --git a/ktt/__init__.py b/ktt/__init__.py new file mode 100644 index 0000000..15d55b9 --- /dev/null +++ b/ktt/__init__.py @@ -0,0 +1,4 @@ +from atg import * +from atr import * +from data import * +from template import * \ No newline at end of file diff --git a/ktt/atg.py b/ktt/atg.py new file mode 100644 index 0000000..7e806de --- /dev/null +++ b/ktt/atg.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +''' +Advanced Text Generator module for a KaiSD Text Tools. + +(c) 2013 Ivan "Kai SD" Korystin + +License: GPLv3 +''' +from os.path import join, exists +from os import makedirs + +class ATG(object): + ''' + Advanced Text Generator is a class, created to generate multiple + text files from table data. + ''' + def __init__(self, data, template): + ''' + Constructor. + data - an instance of the data.Data class (i.e. CSVData) + template - an instance of the template.Template class (i.e. TemplateV2) + ''' + self.data = data + self.template = template + self.out = template.process(data) + + if type(self.out) == dict: + self.multiple = True + else: + self.multiple = False + + def write_files(self, outputDir='.'): + ''' + Write generated files to the given directory. + ''' + encoding = self.template.encoding + extension = self.template.extension + out = self.out + if self.multiple: + for name in out.keys(): + namepath = name.replace('\\', '/').split('/') + newpath = u'' + for i in namepath[:-1]: + newpath = join(newpath, i) + if not exists(join(unicode(outputDir),newpath)): + makedirs(join(unicode(outputDir),newpath)) + fname = join(unicode(outputDir),name+'.'+extension) + if fname.endswith('.'): + fname = fname[:-1] + f = open(fname, 'w') + f.write(out[name].encode(encoding)) + self.log(' Saved %s' % (name+'.'+extension)) + f.close() + else: + name = self.template.bonusPrefix + '.' + extension + if name == '.': + name = self.template.keyField + namepath = name.replace('\\', '/').split('/') + newpath = u'' + for i in namepath[:-1]: + newpath = join(newpath, i) + if not exists(join(unicode(outputDir),newpath)): + makedirs(join(unicode(outputDir),newpath)) + f = open(join(unicode(outputDir),name+'.'+extension), 'w') + f.write(out.encode(encoding)) + self.log(' Saved %s' % (name+'.'+extension)) + f.close() + + def log(self, text): + ''' + Print information + ''' + pass \ No newline at end of file diff --git a/ktt/atr.py b/ktt/atr.py new file mode 100644 index 0000000..4911f09 --- /dev/null +++ b/ktt/atr.py @@ -0,0 +1,168 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +''' +Advanced Text Replacer module for a KaiSD Text Tools. + +(c) 2013 Ivan "Kai SD" Korystin + +License: GPLv3 +''' +import re +class ATR(object): + ''' + Advanced Text Replacer - is a class, created to make multiple replacements + in the content or names of text file. + It can make plain replacements, or use ATG templates to do something more complex. + ''' + + def __init__(self, files): + ''' + Constructor + ''' + self.files = files + self.replacements = [] + + def plain_replace(self, pattern, string, regexp=False): + ''' + Replaces the given pattern with string in files. + ''' + if regexp: + pattern = re.compile(pattern) + self.replacements.append((pattern, string)) + + + def templated_replace(self, pattern, template, data, keyFormat='filename', regexp=False): + ''' + Replaces the given pattern with data formated by template. + Valid values for keyFormat: + filename - take data rows by filename(path ignored), key value of the data row should store the filename. + fullname - as filename, but with path. + index - take data rows in order, key value of the data row should store the index. Indexes starts with 0. + If filename or index cannot be found in data keys, pattern will not be replaced. + ''' + if regexp: + pattern = re.compile(pattern) + strings = template.process(data) + self.replacements.append((pattern, strings, keyFormat)) + + + def write_in_place(self): + ''' + Do replacement and save the files + ''' + for f in self.files: + out = u'' + with open(f, 'rb') as file: + out = file.read() + + idx = 0 + for r in self.replacements: + if type(r[0]) in (str, unicode): + pattern = re.compile(re.escape(r[0])) + string = r[1] + elif type(r[0]) is dict and len(r) == 3: + if r[2] == 'filename': + fname = f.replace('\\', '/').split('/')[-1] + string = f[1].get(fname, None) + elif r[2] == 'fullname': + string = f[1].get(f, None) + elif r[2] == 'index': + fname = f.replace('\\', '/').split('/')[-1] + string = f[1].get(idx, None) + else: + raise BaseException('Unknown data key format.') + elif hasattr(r[0], 'match'): + pattern = r[0] + string = r[1] + else: + raise BaseException('Unknown pattern type.') + if string: + out = re.sub(pattern, string, out) + + with open(f, 'wb') as outfile: + outfile.write(out) + + def write_new_files(self, outfiles): + ''' + Do replacement, but save to given files instead of the original ones. + ''' + if not len(outfiles) == len(self.files): + raise BaseException('Lists of original and new files has different length.') + + for f in self.files: + out = u'' + with open(f, 'rb') as file: + out = file.read() + + idx = 0 + for r in self.replacements: + if type(r[0]) in (str, unicode): + pattern = re.compile(re.escape(r[0])) + string = r[1] + elif type(r[0]) is dict and len(r) == 3: + if r[2] == 'filename': + fname = f.replace('\\', '/').split('/')[-1] + string = f[1].get(fname, None) + elif r[2] == 'fullname': + string = f[1].get(f, None) + elif r[2] == 'index': + fname = f.replace('\\', '/').split('/')[-1] + string = f[1].get(idx, None) + else: + raise BaseException('Unknown data key format.') + elif hasattr(r[0], 'match'): + pattern = r[0] + string = r[1] + else: + raise BaseException('Unknown pattern type.') + if string: + out = re.sub(pattern, string, out) + + with open(outfiles[self.files.index(f)], 'wb') as outfile: + outfile.write(out) + + def replace_in_names(self): + ''' + Do replacement, but in file names instead of file content. Returns the list of new file names, + you can use it with writeNewFiles() method. + ''' + out = [] + for f in self.files: + new = f + idx = 0 + for r in self.replacements: + if type(r[0]) in (str, unicode): + pattern = re.compile(re.escape(r[0])) + string = r[1] + elif type(r[0]) is dict and len(r) == 3: + if r[2] == 'filename': + fname = f.replace('\\', '/').split('/')[-1] + string = f[1].get(fname, None) + elif r[2] == 'fullname': + string = f[1].get(f, None) + elif r[2] == 'index': + fname = f.replace('\\', '/').split('/')[-1] + string = f[1].get(idx, None) + else: + raise BaseException('Unknown data key format.') + elif hasattr(r[0], 'match'): + pattern = r[0] + string = r[1] + else: + raise BaseException('Unknown pattern type.') + if string: + new = re.sub(pattern, string, new) + out.append(new) + return out + + def clear_replacements(self): + ''' + Removes all replacements. + ''' + self.replacements = [] + + def log(self, string): + ''' + Print information + ''' + pass \ No newline at end of file diff --git a/ktt/data.py b/ktt/data.py new file mode 100644 index 0000000..f868d5d --- /dev/null +++ b/ktt/data.py @@ -0,0 +1,270 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +''' +Data module for a KaiSD Text Tools. + +(c) 2013 Ivan "Kai SD" Korystin + +License: GPLv3 +''' + +import csv, codecs, cStringIO + +class Data(object): + ''' + Empty data class. Can be used for a subclassing or procedural data creation. + ''' + def __init__(self, *args, **kwargs): + ''' + Constructor + ''' + self.keys = [] + self.rows = [] + + def __getitem__(self, pair): + ''' + Returns a value for given key and row. + ''' + key = pair[0] + row = pair[1] + + keys = self.keys + rows = self.rows + if key in keys: + if len(rows) > row: + return rows[row][keys.index(key)] + else: + raise BaseException('Row %i not found in data' % (row)) + else: + raise BaseException('Named value %s not found in data' % (key)) + + def __setitem__(self, pair, value): + ''' + Sets a value for given key and row. + ''' + key = pair[0] + row = pair[1] + + keys = self.keys + rows = self.rows + if key in keys: + if len(rows) > row: + rows[row][keys.index(key)] = value + else: + raise BaseException('Row %i not found in data' % (row)) + else: + raise BaseException('Named value %s not found in data' % (key)) + + def __str__(self): + ''' + Returns data as string. + ''' + return str((self.keys, self.rows)) + + def __repr__(self): + return self.__str__() + + def has_key(self, key): + ''' + Returns True if given key exists in data + ''' + return key in self.keys + + def add_rows(self, n=1): + ''' + Adds some empty rows to the data. + ''' + keys = self.keys + rows = self.rows + + for n in xrange(0, n): + row = [] + for k in keys: + row.append('') + rows.append(row) + + def add_keys(self, *h): + ''' + Adds new keys to the data. + ''' + keys = self.keys + rows = self.rows + + for i in h: + keys.append(i) + for r in rows: + for i in h: + r.append('') + + def del_row(self, idx): + ''' + Removes giver row from data + ''' + del self.rows[idx] + + def col_by_key(self, key): + ''' + Returns a column by header's name + ''' + keys = self.keys + if key in keys: + idx = keys.index(key) + return self.col_by_idx(idx) + else: + raise BaseException('Named value %s not found in data' % (key)) + + def col_by_idx(self, idx): + ''' + Returns a column by header's index + ''' + cols = [] + rows = self.rows + for r in rows: + if len(r) > idx: + cols.append(r[idx]) + return tuple(cols) + + def row_by_idx(self, idx): + ''' + Returns a row by index. + ''' + return tuple(self.rows[idx]) + + def transpose(self, key_idx = 0): + ''' + Returns the transposed copy of the data. + + key_idx - index of the column, that contains keywords (default: 0) + ''' + new_keys = [self.keys[key_idx]] + new_keys += list(self.col_by_idx(key_idx)) + new_data = Data() + new_data.keys = new_keys + + idx = 0 + for k in self.keys: + if not idx == key_idx: + new_row = [k] + new_row += self.col_by_idx(idx) + new_data.rows.append(new_row) + idx += 1 + + return new_data + + def add_data(self, other): + ''' + Adds rows from another data table to this one. + ''' + sk = self.keys + ok = other.keys + + for k in ok: + if not k in sk: + self.add_keys(k) + + for r in other.rows: + new_row = [] + if len(r) >= len(sk): + for k in sk: + if k in ok: + new_row.append(r[ok.index(k)]) + else: + new_row.append('') + self.rows.append(new_row) + +class CSVData(Data): + ''' + Class for reading CSV files. + ''' + class Reader: + class Recoder: + def __init__(self, f, encoding): + self.reader = codecs.getreader(encoding)(f) + + def __iter__(self): + return self + + def next(self): + return self.reader.next().encode("utf-8") + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwargs): + f = self.Recoder(f, encoding) + self.reader = csv.reader(f, dialect=dialect, **kwargs) + + def next(self): + row = self.reader.next() + return [unicode(s, "utf-8") for s in row] + + def __iter__(self): + return self + + class Writer: + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwargs): + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwargs) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([unicode(s).encode("utf-8") for s in row]) + data = self.queue.getvalue() + data = data.decode("utf-8") + data = self.encoder.encode(data) + self.stream.write(data) + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) + + def __init__(self, file, encoding='utf-8', delimiter=';', quotechar='"', **kwargs): + ''' + Constructor. + + filename - CSV table filename + encoding - CSV table encoding (default: utf-8) + delimiter - CSV table delimiter (default: ;) + quotechar - CSV table quotechar (default: ") + ''' + if file: + if type(file) == str: + with open(file) as f: + csvfile = self.Reader(f, encoding=encoding, delimiter=delimiter, quotechar=quotechar) + else: + csvfile = self.Reader(file, encoding=encoding, delimiter=delimiter, quotechar=quotechar) + + source_data = [] + source_keys = None; + + for i in csvfile: + if not source_keys: + source_keys = i + else: + for k in xrange(0, len(i)): + try: + i[k] = int(i[k]) + except: + try: + i[k] = float(i[k]) + except: + i[k] = i[k] + source_data.append(i) + + self.keys = source_keys + self.rows = source_data + else: + super(CSVData, self).__init__() + + def export_csv(self, filename, encoding='utf-8', delimiter=';', quotechar='"', **kwargs): + ''' + Saves the data to CSV file + + filename - CSV table filename + encoding - CSV table encoding (default: utf-8) + delimiter - CSV table delimiter (default: ;) + quotechar - CSV table quotechar (default: ") + ''' + with open(filename, 'wb') as f: + csvfile = self.Writer(f, encoding='utf-8', delimiter=';', quotechar='"', **kwargs) + csvfile.writerow(self.keys) + csvfile.writerows(self.rows) diff --git a/ktt/template.py b/ktt/template.py new file mode 100644 index 0000000..6325642 --- /dev/null +++ b/ktt/template.py @@ -0,0 +1,482 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +''' +Template module for a KaiSD Text Tools. + +(c) 2013 Ivan "Kai SD" Korystin + +License: GPLv3 +''' + +import re +class Template(object): + ''' + Empty template class. Generates empty text. + ''' + def process(self, data): + ''' + Replace this method in subclasses. + ''' + return '' + + def warning(self, text): + ''' + Prints a warning + ''' + print text + + def log(self, text): + ''' + Print information + ''' + pass + +class TemplateV2(Template): + ''' + Class for reading ATGv2 templates. + + ATGv2 template file should be a plain text file, starting with the line + ATGV2 + followed by the info line: + [$KeyField$Extension$Prefix$Encoding$] + where + KeyField - is a name of a data column, that contains an identifier. + Extension - is the desired extension for the generated files. + Prefix - is the desired filename prefix for the generated files + Encoding - is the desired encoding for the generated files. + The line may also have some optional keywords before the closing bracket: + oneFile$ - place all generated text into a single file instead of + generating a file for each table row. + After the info line, you can put your text. + You can use following commands to handle the data: + * [$Name$], where Name is the column header, + will be replaced with value from the current row. + * [$ATGLINDEX$] will be replaced with the number of a current row. + * [$ATGHEADER$Text$] and [$ATGFOOTER$Text$] will place the given text + at the begining or at the end of the file. You can't use other + commands in this text. + * [$ATGLIST$Name$Text$], where Name is a multi-column header + (i.e. 'Col' will represent 'Col1', 'Col2', 'Col3' etc) + will repeat the given text for each non-empty value. + You can use other commands in Text. Also [$Name$] inside the list + will be replaced with the value for the current row and column. + * [$ATGLINDEX$] can be used only inside the ATGLIST text, + will be replaced with the current column index. + * [$ATGLISTCUT$Name$Text$] - same as ATGLIST, but the last symbol + will be removed. Useful for removing unnecessary newlines. + * [$ATGIF$Name$Value$Text$] will be replaced with the given text + only if the the given column's value is the same as the given one. + Will be replaced with the empty text otherwise. You can use other + commands in Text. + * [$ATGIFNOT$Name$Value$Text$] - same as ATGIF, but the column's value + should not be equal to the given one. + * [$ATGGREATER$Name$Value$Text$] - same as ATGIF, but the value should + be the number and it should be greater then the given one. + * [$ATGGREATER$Name$Value$Text$] - same as ATGGREATER, but the value + should be less then the given one. + * [$ATGREPLACE$Text1$Text2$] - Will replace Text1 with Text2. Replacements + will be done after all other commands. You can't use regular expressions or + other commands in the text. + * [$ATGPREFIX$Text$] - Will add the given text to the filename prefix. + You can use other commands in text, but do it carefully. + * [$ATGSKIP$] - Skip the current row. Use only in combination with the + ATGIF/ATGIFNOT, or you will generate nothing. + * [$ATGPREV$Name$], where Name is the column header, + will be replaced with the with the value of the given header from the + previous row. ATGSKIP will be used for the first row. + ''' + + def __init__(self, filename=None, encoding='utf-8', text=''): + ''' + Constructor. + + filename - name of the ATGv2 template file. + encoding - encoding of the template file. + text - text to use if no filename has been provided. + ''' + if filename: + with open(filename, 'r') as templateFile: + topline = templateFile.readline().decode(encoding) + if not topline.startswith('ATGV2'): + raise BaseException('%s is not an ATGv2 template' % (filename)) + + key = templateFile.readline().decode(encoding) + if key[:2] == '[$' and key[-3:-1] == '$]': + keyInfo = key[2:-2].split('$') + if len(keyInfo) < 4: + raise BaseException('%s has bad ATGv2 key' % (filename)) + self.keyField = keyInfo[0] + self.extension = keyInfo[1] + self.prefix = keyInfo[2] + self.encoding = keyInfo[3] + if 'oneFile' in keyInfo[4:]: + self.oneFile = True + else: + self.oneFile = False + self.text = u'' + else: + raise BaseException('%s has bad ATGv2 key' % (filename)) + + for i in templateFile.readlines(): + self.text += i.decode(encoding) + else: + self.text = text + + self.header = u'' + self.footer = u'' + self.replacement = {} + self._data = None + self._multiWords = None + + def parse(text): + topParts = [] + matches = {} + + openers = re.finditer('\[\$.*?\$', text) + closers = re.finditer('\$\]', text) + ops = [] + try: + cl = closers.next() + while not cl is None: + try: + op = openers.next() + if op.start() < cl.start(): + ops.append(op) + else: + idx = -1 + try: + while ops[idx].start() > cl.start(): + idx -= 1 + except: + raise BaseException('Template parsing error: can not find the opener for '+str(cl.start())) + matches[ops[idx]] = cl + if len(ops) == 1 or idx == -len(ops): + topParts.append(ops[idx]) + del ops[idx] + ops.append(op) + try: + cl = closers.next() + except StopIteration: + cl = None + except StopIteration: + idx = -1 + try: + while ops[idx].start() > cl.start(): + idx -= 1 + except: + raise BaseException('Template parsing error: can not find the opener for '+str(cl.start())) + matches[ops[idx]] = cl + if len(ops) == 1 or idx == -len(ops): + topParts.append(ops[idx]) + del ops[idx] + try: + cl = closers.next() + except StopIteration: + cl = None + except StopIteration: + pass + parts = [] + for i in topParts: + startPoint = i.end() + endPoint = matches[i].start() + p = (i.group()[2:-1], text[startPoint:endPoint]) + if p[0].startswith('ATG'): + parts.insert(0, p) + else: + parts.append(p) + return parts + + partCommands = {} + + def plain(index, flow, keytag): + if not keytag in self._data.keys: + self.warning('WARNING: keyword not found in table - %s' % (keytag)) + return flow + return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index])) + partCommands['_ATGPLAIN'] = plain + + def nPlain(index, flow, keytag, number): + if not keytag+str(number) in self._data.keys: + self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number))) + return flow + return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index])) + + def lIndex(index, flow, keytag, number): + return flow.replace('[$ATGLINDEX$]', str(number)) + + def addHeader(index, flow, text): + if self.header.find(text) < 0: + self.header += text + key = '[$ATGHEADER$' + text + '$]' + return flow.replace(key,'') + partCommands['ATGHEADER'] = addHeader + + def addFooter(index, flow, text): + if self.footer.find(text) < 0: + self.footer += text + key = '[$ATGFOOTER$' + text + '$]' + return flow.replace(key,'') + partCommands['ATGFOOTER'] = addFooter + + def addList(index, flow, string): + key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + sub = string[len(string.split('$')[0])+1:] + keyTag = string.split('$')[0] + subparts = parse(sub) + myText = u'' + if not keyTag in self._multiWords: + self.warning('Keytag %s is not multiple!' % (keyTag)) + return flow + for j in xrange(1, self._multiWords[keyTag]+1): + subText = sub + for sp in subparts: + if sp[0] in self._multiWords: + subText = nPlain(index, subText, sp[0], j) + elif sp[0] == 'ATGLINDEX': + subText = lIndex(index, subText, sp[0], j) + elif sp[0] in partCommands: + subText = partCommands[sp[0]](index, subText, sp[1]) + elif sp[1] == '': + subText = plain(index, subText, sp[0]) + else: + self.warning('Warning: unknown command '+sp[0]) + if not self._data[keyTag+str(j), index] == u'': + myText += subText + return flow.replace(key, myText) + partCommands['ATGLIST'] = addList + + def addListCut(index, flow, string): + key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + sub = string[len(string.split('$')[0])+1:] + keyTag = string.split('$')[0] + subparts = parse(sub) + myText = u'' + if not keyTag in self._multiWords: + self.warning('Keytag %s is not multiple!' % (keyTag)) + return flow + for j in xrange(1, self._multiWords[keyTag]+1): + subText = sub + for sp in subparts: + if sp[0] in self._multiWords: + subText = nPlain(index, subText, sp[0], j) + elif sp[0] == 'ATGLINDEX': + subText = lIndex(index, subText, sp[0], j) + elif sp[0] in partCommands: + subText = partCommands[sp[0]](index, subText, sp[1]) + elif sp[1] == '': + subText = plain(index, subText, sp[0]) + else: + self.warning('Warning: unknown command '+sp[0]) + if not self._data[keyTag+str(j), index] == u'': + myText += subText + return flow.replace(key, myText[:-1]) + partCommands['ATGLISTCUT'] = addListCut + + def addIf(index, flow, string): + key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] + keyTag = string.split('$')[0] + targetValue = string.split('$')[1] + subparts = parse(sub) + myText = u'' + if self._data[keyTag, 0] == []: + self.warning('WARNING: keyword not found in table - %s' % (keyTag)) + return flow + if unicode(self._data[keyTag, index]) == unicode(targetValue): + subText = sub + for sp in subparts: + if sp[0] in partCommands: + subText = partCommands[sp[0]](index, subText, sp[1]) + elif sp[1] == '': + subText = plain(index, subText, sp[0]) + else: + self.warning('Warning: unknown command '+sp[0]) + myText += subText + return flow.replace(key, myText) + partCommands['ATGIF'] = addIf + + def addIfNot(index, flow, string): + key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] + keyTag = string.split('$')[0] + targetValue = string.split('$')[1] + subparts = parse(sub) + myText = u'' + if self._data[keyTag, 0] == []: + self.warning('WARNING: keyword not found in table - %s' % (keyTag)) + return flow + if not unicode(self._data[keyTag, index]) == unicode(targetValue): + subText = sub + for sp in subparts: + if sp[0] in partCommands: + subText = partCommands[sp[0]](index, subText, sp[1]) + elif sp[1] == '': + subText = plain(index, subText, sp[0]) + else: + self.warning('Warning: unknown command '+sp[0]) + myText += subText + return flow.replace(key, myText) + partCommands['ATGIFNOT'] = addIfNot + + def addGreater(index, flow, string): + key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] + keyTag = string.split('$')[0] + targetValue = string.split('$')[1] + subparts = parse(sub) + myText = u'' + if self._data[keyTag, 0] == []: + self.warning('WARNING: keyword not found in table - %s' % (keyTag)) + return flow + try: + if float(self._data[keyTag, index]) > float(targetValue): + subText = sub + for sp in subparts: + if sp[0] in partCommands: + subText = partCommands[sp[0]](index, subText, sp[1]) + elif sp[1] == '': + subText = plain(index, subText, sp[0]) + else: + self.warning('Warning: unknown command '+sp[0]) + myText += subText + except: + self.warning('ERROR: trying to compare uncomparable values!') + return flow.replace(key, myText) + partCommands['ATGGREATER'] = addGreater + + def addLess(index, flow, string): + key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:] + keyTag = string.split('$')[0] + targetValue = string.split('$')[1] + subparts = parse(sub) + myText = u'' + if self._data[keyTag, 0] == []: + self.warning('WARNING: keyword not found in table - %s' % (keyTag)) + return flow + try: + if float(self._data[keyTag, index]) < float(targetValue): + subText = sub + for sp in subparts: + if sp[0] in partCommands: + subText = partCommands[sp[0]](index, subText, sp[1]) + elif sp[1] == '': + subText = plain(index, subText, sp[0]) + else: + self.warning('Warning: unknown command '+sp[0]) + myText += subText + except: + self.warning('ERROR: trying to compare uncomparable values!') + return flow.replace(key, myText) + partCommands['ATGLESS'] = addLess + + def addReplace(index, flow, string): + key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + targetString = string[len(string.split('$')[0])+1:] + srcString = string.split('$')[0] + self.replacement[srcString] = targetString + key = '[$ATGREPLACE$' + string + '$]' + return flow.replace(key,'') + partCommands['ATGREPLACE'] = addReplace + + def addPrefix(index, flow, string): + key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:]) + sub = string + subparts = parse(sub) + for sp in subparts: + if sp[0] in partCommands: + sub = partCommands[sp[0]](index, sub, sp[1]) + elif sp[1] == '': + sub = plain(index, sub, sp[0]) + else: + self.warning('Warning: unknown command '+sp[0]) + self.bonusPrefix += sub + key = '[$ATGPREFIX$' + string + '$]' + return flow.replace(key,'') + partCommands['ATGPREFIX'] = addPrefix + + def skip(index, flow, string): + return u'[$ATGSKIP_DO$]' + partCommands['ATGSKIP'] = skip + + def prev(index, flow, string): + key = '[$ATGPREV$%s$]' % (string.split('$')[0]) + keytag = string.split('$')[0] + if self._data[keytag, 0] == []: + self.warning('WARNING: keyword not found in table - %s' % (keytag)) + return flow + if index == 0: + self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0') + return u'[$ATGSKIP_DO$]' + return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1])) + partCommands['ATGPREV'] = prev + + self.commands = partCommands + self.parts = parse(self.text) + + def process(self, data): + ''' + Generate text for the given data. + ''' + self._data = data + + multiWords = {} + numbs = ('1','2','3','4','5','6','7','8','9','0') + + for i in data.keys: + multi = False + while i[-1] in numbs: + i = i[:-1] + multi = True + if multi: + if i in multiWords: + multiWords[i] += 1 + else: + multiWords[i] = 1 + self._multiWords = multiWords + + if self.oneFile: + out = '' + else: + out = {} + index = 0 + partCommands = self.commands + for element in data.col_by_key(self.keyField): + self.bonusPrefix = self.prefix + text = self.text + for i in self.parts: + if i[0] in partCommands: + text = partCommands[i[0]](index, text, i[1]) + elif i[1] == u'': + text = partCommands['_ATGPLAIN'](index, text, i[0]) + else: + self.warning('Warning: unknown command '+i[0]) + for i in self.replacement: + text = text.replace(i, self.replacement[i]) + self.replacement = {} + index += 1 + + if u'[$ATGSKIP_DO$]' in text: + self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.') + else: + if self.oneFile: + out += text + else: + name = self.bonusPrefix + unicode(element) + out[name] = self.header + text + self.footer + self.log('Created %s' % (element)) + + if self.oneFile: + out = self.header + out + self.footer + + return out + + @staticmethod + def express(cls, text, **kwargs): + obj = cls() + obj.text = text + obj.keyField = kwargs.get('keyField', 'Index') + obj.extension = kwargs.get('extension', '') + obj.prefix = kwargs.get('prefix', '') + obj.encoding = kwargs.get('encoding', 'utf-8') + return obj \ No newline at end of file