KaiSD преди 11 години
родител
ревизия
4968131d94
променени са 6 файла, в които са добавени 857 реда и са изтрити 1 реда
  1. +10
    -1
      README.md
  2. +62
    -0
      atg.py
  3. +163
    -0
      atr.py
  4. +176
    -0
      data.py
  5. +27
    -0
      ktt_atgcsv.py
  6. +419
    -0
      template.py

+ 10
- 1
README.md Целия файл

@@ -1,4 +1,13 @@
ktt
===

Kai's text tools
Kai's text tools

Contains following python modules:
atg - Advanced Text Generator. Created to generate large numbers of files, using a template and a data stored in table.
atr - Advanced Text Replacer. Created to handle various ways of replacements in text files.
template - template class for ATG and ATR.
data - data class for ATG and ATR.

Scripts:
ktt_atgcsv.py - generates files from CSV table and ATGv2 template file.

+ 62
- 0
atg.py Целия файл

@@ -0,0 +1,62 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Advanced Text Generator module for a Kai's Text Tools.

(c) 2013 Ivan "Kai SD" Korystin

License: GPLv3
'''
from os.path import join, split, exists
from os import makedirs

class ATG(object):
'''
Advanced Text Generator is a class, created to generate multiple text files from table data.
'''
def __init__(self, data, template):
self.data = data
self.template = template
self.out = template.process(data)
if type(self.out) == dict:
self.multiple = True
else:
self.multiple = False
def write_files(self, outputDir='.'):
encoding = self.template.encoding
extension = self.template.extension
out = self.out
if self.multiple:
for name in out.keys():
namepath = name.replace('\\', '/').split('/')
newpath = u''
for i in namepath[:-1]:
newpath = join(newpath, i)
if not exists(join(unicode(outputDir),newpath)):
makedirs(join(unicode(outputDir),newpath))
fname = join(unicode(outputDir),name+'.'+extension)
if fname.endswith('.'):
fname = fname[:-1]
f = open(fname, 'w')
f.write(out[name].encode(encoding))
self.log(' Saved %s' % (name+'.'+extension))
f.close()
else:
name = self.template.bonusPrefix + '.' + extension
if name == '.':
name = self.template.keyField
namepath = name.replace('\\', '/').split('/')
newpath = u''
for i in namepath[:-1]:
newpath = join(newpath, i)
if not exists(join(unicode(outputDir),newpath)):
makedirs(join(unicode(outputDir),newpath))
f = open(join(unicode(outputDir),name+'.'+extension), 'w')
f.write(out.encode(encoding))
self.log(' Saved %s' % (name+'.'+extension))
f.close()
def log(self, text):
pass

+ 163
- 0
atr.py Целия файл

@@ -0,0 +1,163 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Advanced Text Replacer module for a Kai's Text Tools.

(c) 2013 Ivan "Kai SD" Korystin

License: GPLv3
'''
import re
class ATR(object):
'''
classdocs
'''

def __init__(self, files):
'''
Constructor
'''
self.files = files
self.replacements = []
def plain_replace(self, pattern, string, regexp=False):
'''
Replaces the given pattern with string in files.
'''
if regexp:
pattern = re.compile(pattern)
self.replacements.append((pattern, string))
def templated_replace(self, pattern, template, data, keyFormat='filename', regexp=False):
'''
Replaces the given pattern with data formated by template.
Valid values for keyFormat:
filename - take data rows by filename(path ignored), key value of the data row should store the filename.
fullname - as filename, but with path.
index - take data rows in order, key value of the data row should store the index. Indexes starts with 0.
If filename or index cannot be found in data keys, pattern will not be replaced.
'''
if regexp:
pattern = re.compile(pattern)
strings = template.process(data)
self.replacements.append((pattern, strings, keyFormat))
def write_in_place(self):
'''
Do replacement and save the files
'''
for f in self.files:
out = u''
with open(f, 'rb') as file:
out = file.read()
idx = 0
for r in self.replacements:
if type(r[0]) in (str, unicode):
pattern = re.compile(re.escape(r[0]))
string = r[1]
elif type(r[0]) is dict and len(r) == 3:
if r[2] == 'filename':
fname = f.replace('\\', '/').split('/')[-1]
string = f[1].get(fname, None)
elif r[2] == 'fullname':
string = f[1].get(f, None)
elif r[2] == 'index':
fname = f.replace('\\', '/').split('/')[-1]
string = f[1].get(idx, None)
else:
raise BaseException('Unknown data key format.')
elif hasattr(r[0], 'match'):
pattern = r[0]
string = r[1]
else:
raise BaseException('Unknown pattern type.')
if string:
out = re.sub(pattern, string, out)
with open(f, 'wb') as outfile:
outfile.write(out)
def write_new_files(self, outfiles):
'''
Do replacement, but save to given files instead of the original ones.
'''
if not len(outfiles) == len(self.files):
raise BaseException('Lists of original and new files has different length.')
for f in self.files:
out = u''
with open(f, 'rb') as file:
out = file.read()
idx = 0
for r in self.replacements:
if type(r[0]) in (str, unicode):
pattern = re.compile(re.escape(r[0]))
string = r[1]
elif type(r[0]) is dict and len(r) == 3:
if r[2] == 'filename':
fname = f.replace('\\', '/').split('/')[-1]
string = f[1].get(fname, None)
elif r[2] == 'fullname':
string = f[1].get(f, None)
elif r[2] == 'index':
fname = f.replace('\\', '/').split('/')[-1]
string = f[1].get(idx, None)
else:
raise BaseException('Unknown data key format.')
elif hasattr(r[0], 'match'):
pattern = r[0]
string = r[1]
else:
raise BaseException('Unknown pattern type.')
if string:
out = re.sub(pattern, string, out)
with open(outfiles[self.files.index(f)], 'wb') as outfile:
outfile.write(out)
def replace_in_names(self):
'''
Do replacement, but in file names instead of file content. Returns the list of new file names,
you can use it with writeNewFiles() method.
'''
out = []
for f in self.files:
new = f
idx = 0
for r in self.replacements:
if type(r[0]) in (str, unicode):
pattern = re.compile(re.escape(r[0]))
string = r[1]
elif type(r[0]) is dict and len(r) == 3:
if r[2] == 'filename':
fname = f.replace('\\', '/').split('/')[-1]
string = f[1].get(fname, None)
elif r[2] == 'fullname':
string = f[1].get(f, None)
elif r[2] == 'index':
fname = f.replace('\\', '/').split('/')[-1]
string = f[1].get(idx, None)
else:
raise BaseException('Unknown data key format.')
elif hasattr(r[0], 'match'):
pattern = r[0]
string = r[1]
else:
raise BaseException('Unknown pattern type.')
if string:
new = re.sub(pattern, string, new)
out.append(new)
return out
def clear_replacements(self):
'''
Removes all replacements.
'''
self.replacements = []
def log(self, string):
pass

+ 176
- 0
data.py Целия файл

@@ -0,0 +1,176 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Data module for a Kai's Text Tools.

(c) 2013 Ivan "Kai SD" Korystin

License: GPLv3
'''

import csv, codecs

class Data(object):
'''
Empty data class. Can be used for a subclassing or procedural data creation.
'''
def __init__(self, *args, **kwargs):
'''
Constructor
'''
self.keys = []
self.rows = []
def __getitem__(self, pair):
'''
Returns a value for given key and row.
'''
key = pair[0]
row = pair[1]
keys = self.keys
rows = self.rows
if key in keys:
if len(rows) > row:
return rows[row][keys.index(key)]
else:
raise BaseException('Row %i not found in data' % (row))
else:
raise BaseException('Named value %s not found in data' % (key))
def __setitem__(self, pair, value):
'''
Sets a value for given key and row.
'''
key = pair[0]
row = pair[1]
keys = self.keys
rows = self.rows
if key in keys:
if len(rows) > row:
rows[row][keys.index(key)] = value
else:
raise BaseException('Row %i not found in data' % (row))
else:
raise BaseException('Named value %s not found in data' % (key))
def __str__(self):
'''
Returns data as string.
'''
return str((self.keys, self.rows))
def __repr__(self):
return self.__str__()
def has_key(self, key):
'''
Returns True if given key exists in data
'''
return key in self.keys
def add_rows(self, n=1):
'''
Adds some empty rows to the data.
'''
keys = self.keys
rows = self.rows
for n in xrange(0, n):
row = []
for k in keys:
row.append('')
rows.append(row)
def add_keys(self, *h):
'''
Adds new keys to the data.
'''
keys = self.keys
rows = self.rows
for i in h:
keys.append(i)
for r in rows:
for i in h:
r.append('')
def col_by_key(self, key):
cols = []
keys = self.keys
rows = self.rows
if key in keys:
idx = keys.index(key)
for r in rows:
cols.append(r[idx])
else:
raise BaseException('Named value %s not found in data' % (key))
return tuple(cols)
def row_by_idx(self, idx):
return tuple(self.rows[idx])

class CSVData(Data):
'''
Class for reading CSV files.
'''
class Reader:
class Recoder:
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = self.Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
def __init__(self, filename, encoding='utf-8', delimiter=';', quotechar='"', **kwargs):
csvfile = self.Reader(open(filename), encoding=encoding, delimiter=delimiter, quotechar=quotechar)
sourceData = []
sourcekeys = None
if kwargs.get('transpose', False):
sourcekeys = []
rowData = []
for i in csvfile:
sourcekeys.append(i[0])
for k in xrange(1, len(i)):
sourceData.append([])
try:
i[k] = int(i[k])
except:
try:
i[k] = float(i[k])
except:
i[k] = i[k]
rowData.append(i[1:])
sourceData = list(map(lambda *x:x, *rowData))
else:
for i in csvfile:
if sourcekeys is None:
sourcekeys = i
else:
for k in xrange(0, len(i)):
try:
i[k] = int(i[k])
except:
try:
i[k] = float(i[k])
except:
i[k] = i[k]
sourceData.append(i)
self.keys = sourcekeys
self.rows = sourceData

+ 27
- 0
ktt_atgcsv.py Целия файл

@@ -0,0 +1,27 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Generates files from csv table.

Part of Kai's Text Tools

(c) 2013 Ivan "Kai SD" Korystin

License: GPLv3
'''
from sys import argv
from atg import ATG
from data import CSVData
from template import TemplateV2
from os.path import split

if __name__ == '__main__':
if len(argv) == 3:
generator = ATG(CSVData(argv[1]), TemplateV2(argv[2]))
generator.write_files()
elif len(argv) == 4:
generator = ATG(CSVData(argv[1]), TemplateV2(argv[2]))
generator.write_files(argv[3])
else:
print 'Usage:', split(argv[0])[-1], '<CSV file>', '<Template file>', '[Output directory]'
print '(c)2013 Ivan "Kai SD" Korystin'

+ 419
- 0
template.py Целия файл

@@ -0,0 +1,419 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Template module for a Kai's Text Tools.

(c) 2013 Ivan "Kai SD" Korystin

License: GPLv3
'''

import re
class TemplateV3(object):
'''
Class for reading ATGv3 templates.
'''
pass
class TemplateV2(object):
'''
Class for reading ATGv2 templates.
'''

def __init__(self, filename=None, encoding='utf-8', text=''):
'''
Constructor
'''
if filename:
with open(filename, 'r') as templateFile:
topline = templateFile.readline().decode(encoding)
if not topline.startswith('ATGV2'):
raise BaseException('%s is not an ATGv2 template' % (filename))
key = templateFile.readline().decode(encoding)
if key[:2] == '[$' and key[-3:-1] == '$]':
keyInfo = key[2:-2].split('$')
if len(keyInfo) < 4:
raise BaseException('%s has bad ATGv2 key' % (filename))
self.keyField = keyInfo[0]
self.extension = keyInfo[1]
self.prefix = keyInfo[2]
self.encoding = keyInfo[3]
if 'oneFile' in keyInfo[4:]:
self.oneFile = True
else:
self.oneFile = False
if 'transpose' in keyInfo[4:]:
self.transpose = True
else:
self.transpose = False
self.text = u''
else:
raise BaseException('%s has bad ATGv2 key' % (filename))
for i in templateFile.readlines():
self.text += i.decode(encoding)
else:
self.text = text
self.key = u''
self.footer = u''
self.replacement = {}
self._data = None
self._multiWords = None
def parse(text):
topParts = []
matches = {}
openers = re.finditer('\[\$.*?\$', text)
closers = re.finditer('\$\]', text)
ops = []
try:
cl = closers.next()
while not cl is None:
try:
op = openers.next()
if op.start() < cl.start():
ops.append(op)
else:
idx = -1
try:
while ops[idx].start() > cl.start():
idx -= 1
except:
raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
matches[ops[idx]] = cl
if len(ops) == 1 or idx == -len(ops):
topParts.append(ops[idx])
del ops[idx]
ops.append(op)
try:
cl = closers.next()
except StopIteration:
cl = None
except StopIteration:
idx = -1
try:
while ops[idx].start() > cl.start():
idx -= 1
except:
raise BaseException('Template parsing error: can not find the opener for '+str(cl.start()))
matches[ops[idx]] = cl
if len(ops) == 1 or idx == -len(ops):
topParts.append(ops[idx])
del ops[idx]
try:
cl = closers.next()
except StopIteration:
cl = None
except StopIteration:
pass
parts = []
for i in topParts:
startPoint = i.end()
endPoint = matches[i].start()
p = (i.group()[2:-1], text[startPoint:endPoint])
if p[0].startswith('ATG'):
parts.insert(0, p)
else:
parts.append(p)
return parts
partCommands = {}
def plain(index, flow, keytag):
if not keytag in self._data.keys:
self.warning('WARNING: keyword not found in table - %s' % (keytag))
return flow
return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag, index]))
partCommands['_ATGPLAIN'] = plain
def nPlain(index, flow, keytag, number):
if not keytag+str(number) in self._data.keys:
self.warning('WARNING: keyword not found in table - %s' % (keytag+str(number)))
return flow
return flow.replace('[$%s$]' % (keytag), unicode(self._data[keytag+str(number), index]))
def lIndex(index, flow, keytag, number):
return flow.replace('[$ATGLINDEX$]', str(number))
def addkey(index, flow, text):
if self.key.find(text) < 0:
self.key += text
key = '[$ATGkey$' + text + '$]'
return flow.replace(key,'')
partCommands['ATGkey'] = addkey
def addFooter(index, flow, text):
if self.footer.find(text) < 0:
self.footer += text
key = '[$ATGFOOTER$' + text + '$]'
return flow.replace(key,'')
partCommands['ATGFOOTER'] = addFooter
def addList(index, flow, string):
key = '[$ATGLIST$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
sub = string[len(string.split('$')[0])+1:]
keyTag = string.split('$')[0]
subparts = parse(sub)
myText = u''
if not keyTag in self._multiWords:
self.warning('Keytag %s is not multiple!' % (keyTag))
return flow
for j in xrange(1, self._multiWords[keyTag]+1):
subText = sub
for sp in subparts:
if sp[0] in self._multiWords:
subText = nPlain(index, subText, sp[0], j)
elif sp[0] == 'ATGLINDEX':
subText = lIndex(index, subText, sp[0], j)
elif sp[0] in partCommands:
subText = partCommands[sp[0]](index, subText, sp[1])
elif sp[1] == '':
subText = plain(index, subText, sp[0])
else:
self.warning('Warning: unknown command '+sp[0])
if not self._data[keyTag+str(j), index] == u'':
myText += subText
return flow.replace(key, myText)
partCommands['ATGLIST'] = addList
def addListCut(index, flow, string):
key = '[$ATGLISTCUT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
sub = string[len(string.split('$')[0])+1:]
keyTag = string.split('$')[0]
subparts = parse(sub)
myText = u''
if not keyTag in self._multiWords:
self.warning('Keytag %s is not multiple!' % (keyTag))
return flow
for j in xrange(1, self._multiWords[keyTag]+1):
subText = sub
for sp in subparts:
if sp[0] in self._multiWords:
subText = nPlain(index, subText, sp[0], j)
elif sp[0] == 'ATGLINDEX':
subText = lIndex(index, subText, sp[0], j)
elif sp[0] in partCommands:
subText = partCommands[sp[0]](index, subText, sp[1])
elif sp[1] == '':
subText = plain(index, subText, sp[0])
else:
self.warning('Warning: unknown command '+sp[0])
if not self._data[keyTag+str(j), index] == u'':
myText += subText
return flow.replace(key, myText[:-1])
partCommands['ATGLISTCUT'] = addListCut
def addIf(index, flow, string):
key = '[$ATGIF$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
keyTag = string.split('$')[0]
targetValue = string.split('$')[1]
subparts = parse(sub)
myText = u''
if self._data[keyTag, 0] == []:
self.warning('WARNING: keyword not found in table - %s' % (keyTag))
return flow
if unicode(self._data[keyTag, index]) == unicode(targetValue):
subText = sub
for sp in subparts:
if sp[0] in partCommands:
subText = partCommands[sp[0]](index, subText, sp[1])
elif sp[1] == '':
subText = plain(index, subText, sp[0])
else:
self.warning('Warning: unknown command '+sp[0])
myText += subText
return flow.replace(key, myText)
partCommands['ATGIF'] = addIf
def addIfNot(index, flow, string):
key = '[$ATGIFNOT$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
keyTag = string.split('$')[0]
targetValue = string.split('$')[1]
subparts = parse(sub)
myText = u''
if self._data[keyTag, 0] == []:
self.warning('WARNING: keyword not found in table - %s' % (keyTag))
return flow
if not unicode(self._data[keyTag, index]) == unicode(targetValue):
subText = sub
for sp in subparts:
if sp[0] in partCommands:
subText = partCommands[sp[0]](index, subText, sp[1])
elif sp[1] == '':
subText = plain(index, subText, sp[0])
else:
self.warning('Warning: unknown command '+sp[0])
myText += subText
return flow.replace(key, myText)
partCommands['ATGIFNOT'] = addIfNot
def addGreater(index, flow, string):
key = '[$ATGGREATER$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
keyTag = string.split('$')[0]
targetValue = string.split('$')[1]
subparts = parse(sub)
myText = u''
if self._data[keyTag, 0] == []:
self.warning('WARNING: keyword not found in table - %s' % (keyTag))
return flow
try:
if float(self._data[keyTag, index]) > float(targetValue):
subText = sub
for sp in subparts:
if sp[0] in partCommands:
subText = partCommands[sp[0]](index, subText, sp[1])
elif sp[1] == '':
subText = plain(index, subText, sp[0])
else:
self.warning('Warning: unknown command '+sp[0])
myText += subText
except:
self.warning('ERROR: trying to compare uncomparable values!')
return flow.replace(key, myText)
partCommands['ATGGREATER'] = addGreater
def addLess(index, flow, string):
key = '[$ATGLESS$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
sub = string[len(string.split('$')[0])+len(string.split('$')[1])+2:]
keyTag = string.split('$')[0]
targetValue = string.split('$')[1]
subparts = parse(sub)
myText = u''
if self._data[keyTag, 0] == []:
self.warning('WARNING: keyword not found in table - %s' % (keyTag))
return flow
try:
if float(self._data[keyTag, index]) < float(targetValue):
subText = sub
for sp in subparts:
if sp[0] in partCommands:
subText = partCommands[sp[0]](index, subText, sp[1])
elif sp[1] == '':
subText = plain(index, subText, sp[0])
else:
self.warning('Warning: unknown command '+sp[0])
myText += subText
except:
self.warning('ERROR: trying to compare uncomparable values!')
return flow.replace(key, myText)
partCommands['ATGLESS'] = addLess
def addReplace(index, flow, string):
key = '[$ATGREPLACE$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
targetString = string[len(string.split('$')[0])+1:]
srcString = string.split('$')[0]
self.replacement[srcString] = targetString
key = '[$ATGREPLACE$' + string + '$]'
return flow.replace(key,'')
partCommands['ATGREPLACE'] = addReplace
def addPrefix(index, flow, string):
key = '[$ATGPREFIX$%s$%s$]' % (string.split('$')[0], string[len(string.split('$')[0])+1:])
sub = string
subparts = parse(sub)
for sp in subparts:
if sp[0] in partCommands:
sub = partCommands[sp[0]](index, sub, sp[1])
elif sp[1] == '':
sub = plain(index, sub, sp[0])
else:
self.warning('Warning: unknown command '+sp[0])
self.bonusPrefix += sub
key = '[$ATGPREFIX$' + string + '$]'
return flow.replace(key,'')
partCommands['ATGPREFIX'] = addPrefix
def skip(index, flow, string):
return u'[$ATGSKIP_DO$]'
partCommands['ATGSKIP'] = skip
def prev(index, flow, string):
key = '[$ATGPREV$%s$]' % (string.split('$')[0])
keytag = string.split('$')[0]
if self._data[keytag, 0] == []:
self.warning('WARNING: keyword not found in table - %s' % (keytag))
return flow
if index == 0:
self.log('INFORMATION: Skipping ATGPREV tag for entry with index = 0')
return u'[$ATGSKIP_DO$]'
return flow.replace('[$ATGPREV$%s$]' % (keytag), unicode(self._data.col_by_key(keytag)[index-1]))
partCommands['ATGPREV'] = prev
self.commands = partCommands
self.parts = parse(self.text)
def process(self, data):
self._data = data
multiWords = {}
numbs = ('1','2','3','4','5','6','7','8','9','0')
for i in data.keys:
multi = False
while i[-1] in numbs:
i = i[:-1]
multi = True
if multi:
if i in multiWords:
multiWords[i] += 1
else:
multiWords[i] = 1
self._multiWords = multiWords
if self.oneFile:
out = ''
else:
out = {}
index = 0
partCommands = self.commands
for element in data.col_by_key(self.keyField):
self.bonusPrefix = self.prefix
text = self.text
for i in self.parts:
if i[0] in partCommands:
text = partCommands[i[0]](index, text, i[1])
elif i[1] == u'':
text = partCommands['_ATGPLAIN'](index, text, i[0])
else:
self.warning('Warning: unknown command '+i[0])
for i in self.replacement:
text = text.replace(i, self.replacement[i])
self.replacement = {}
index += 1
if u'[$ATGSKIP_DO$]' in text:
self.log('ATGSKIP Tag found. Skipping ' + unicode(element) + '.')
else:
if self.oneFile:
out += text
else:
name = self.bonusPrefix + unicode(element)
out[name] = text
self.log('Created %s' % (element))
if self.oneFile:
out = self.key + out + self.footer
return out
def warning(self, text):
print text
def log(self, text):
pass
@staticmethod
def express(cls, text, **kwargs):
obj = cls()
obj.text = text
self.keyField = kwargs.get('keyField', 'Index')
self.extension = kwargs.get('extension', '')
self.prefix = kwargs.get('prefix', '')
self.encoding = kwargs.get('encoding', 'utf-8')
return obj