You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

271 lines
7.6KB

  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. Data module for a Automatic Text Tools.
  5. (c) 2013 Ivan "Kai SD" Korystin
  6. License: GPLv3
  7. '''
  8. import csv, codecs, cStringIO
  9. class Data(object):
  10. '''
  11. Empty data class. Can be used for a subclassing or procedural data creation.
  12. '''
  13. def __init__(self, *args, **kwargs):
  14. '''
  15. Constructor
  16. '''
  17. self.keys = []
  18. self.rows = []
  19. def __getitem__(self, pair):
  20. '''
  21. Returns a value for given key and row.
  22. '''
  23. key = pair[0]
  24. row = pair[1]
  25. keys = self.keys
  26. rows = self.rows
  27. if key in keys:
  28. if len(rows) > row:
  29. return rows[row][keys.index(key)]
  30. else:
  31. raise BaseException('Row %i not found in data' % (row))
  32. else:
  33. raise BaseException('Named value %s not found in data' % (key))
  34. def __setitem__(self, pair, value):
  35. '''
  36. Sets a value for given key and row.
  37. '''
  38. key = pair[0]
  39. row = pair[1]
  40. keys = self.keys
  41. rows = self.rows
  42. if key in keys:
  43. if len(rows) > row:
  44. rows[row][keys.index(key)] = value
  45. else:
  46. raise BaseException('Row %i not found in data' % (row))
  47. else:
  48. raise BaseException('Named value %s not found in data' % (key))
  49. def __str__(self):
  50. '''
  51. Returns data as string.
  52. '''
  53. return str((self.keys, self.rows))
  54. def __repr__(self):
  55. return self.__str__()
  56. def has_key(self, key):
  57. '''
  58. Returns True if given key exists in data
  59. '''
  60. return key in self.keys
  61. def add_rows(self, n=1):
  62. '''
  63. Adds some empty rows to the data.
  64. '''
  65. keys = self.keys
  66. rows = self.rows
  67. for n in xrange(0, n):
  68. row = []
  69. for k in keys:
  70. row.append('')
  71. rows.append(row)
  72. def add_keys(self, *h):
  73. '''
  74. Adds new keys to the data.
  75. '''
  76. keys = self.keys
  77. rows = self.rows
  78. for i in h:
  79. keys.append(i)
  80. for r in rows:
  81. for i in h:
  82. r.append('')
  83. def del_row(self, idx):
  84. '''
  85. Removes giver row from data
  86. '''
  87. del self.rows[idx]
  88. def col_by_key(self, key):
  89. '''
  90. Returns a column by header's name
  91. '''
  92. keys = self.keys
  93. if key in keys:
  94. idx = keys.index(key)
  95. return self.col_by_idx(idx)
  96. else:
  97. raise BaseException('Named value %s not found in data' % (key))
  98. def col_by_idx(self, idx):
  99. '''
  100. Returns a column by header's index
  101. '''
  102. cols = []
  103. rows = self.rows
  104. for r in rows:
  105. if len(r) > idx:
  106. cols.append(r[idx])
  107. return tuple(cols)
  108. def row_by_idx(self, idx):
  109. '''
  110. Returns a row by index.
  111. '''
  112. return tuple(self.rows[idx])
  113. def transpose(self, key_idx = 0):
  114. '''
  115. Returns the transposed copy of the data.
  116. key_idx - index of the column, that contains keywords (default: 0)
  117. '''
  118. new_keys = [self.keys[key_idx]]
  119. new_keys += list(self.col_by_idx(key_idx))
  120. new_data = Data()
  121. new_data.keys = new_keys
  122. idx = 0
  123. for k in self.keys:
  124. if not idx == key_idx:
  125. new_row = [k]
  126. new_row += self.col_by_idx(idx)
  127. new_data.rows.append(new_row)
  128. idx += 1
  129. return new_data
  130. def add_data(self, other):
  131. '''
  132. Adds rows from another data table to this one.
  133. '''
  134. sk = self.keys
  135. ok = other.keys
  136. for k in ok:
  137. if not k in sk:
  138. self.add_keys(k)
  139. for r in other.rows:
  140. new_row = []
  141. if len(r) >= len(sk):
  142. for k in sk:
  143. if k in ok:
  144. new_row.append(r[ok.index(k)])
  145. else:
  146. new_row.append('')
  147. self.rows.append(new_row)
  148. class CSVData(Data):
  149. '''
  150. Class for reading CSV files.
  151. '''
  152. class Reader:
  153. class Recoder:
  154. def __init__(self, f, encoding):
  155. self.reader = codecs.getreader(encoding)(f)
  156. def __iter__(self):
  157. return self
  158. def next(self):
  159. return self.reader.next().encode("utf-8")
  160. def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwargs):
  161. f = self.Recoder(f, encoding)
  162. self.reader = csv.reader(f, dialect=dialect, **kwargs)
  163. def next(self):
  164. row = self.reader.next()
  165. return [unicode(s, "utf-8") for s in row]
  166. def __iter__(self):
  167. return self
  168. class Writer:
  169. def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwargs):
  170. self.queue = cStringIO.StringIO()
  171. self.writer = csv.writer(self.queue, dialect=dialect, **kwargs)
  172. self.stream = f
  173. self.encoder = codecs.getincrementalencoder(encoding)()
  174. def writerow(self, row):
  175. self.writer.writerow([unicode(s).encode("utf-8") for s in row])
  176. data = self.queue.getvalue()
  177. data = data.decode("utf-8")
  178. data = self.encoder.encode(data)
  179. self.stream.write(data)
  180. self.queue.truncate(0)
  181. def writerows(self, rows):
  182. for row in rows:
  183. self.writerow(row)
  184. def __init__(self, file, encoding='utf-8', delimiter=';', quotechar='"', **kwargs):
  185. '''
  186. Constructor.
  187. filename - CSV table filename
  188. encoding - CSV table encoding (default: utf-8)
  189. delimiter - CSV table delimiter (default: ;)
  190. quotechar - CSV table quotechar (default: ")
  191. '''
  192. if file:
  193. if type(file) == str:
  194. with open(file) as f:
  195. csvfile = self.Reader(f, encoding=encoding, delimiter=delimiter, quotechar=quotechar)
  196. else:
  197. csvfile = self.Reader(file, encoding=encoding, delimiter=delimiter, quotechar=quotechar)
  198. source_data = []
  199. source_keys = None;
  200. for i in csvfile:
  201. if not source_keys:
  202. source_keys = i
  203. else:
  204. for k in xrange(0, len(i)):
  205. try:
  206. i[k] = int(i[k])
  207. except:
  208. try:
  209. i[k] = float(i[k])
  210. except:
  211. i[k] = i[k]
  212. source_data.append(i)
  213. self.keys = source_keys
  214. self.rows = source_data
  215. else:
  216. super(CSVData, self).__init__()
  217. def export_csv(self, filename, encoding='utf-8', delimiter=';', quotechar='"', **kwargs):
  218. '''
  219. Saves the data to CSV file
  220. filename - CSV table filename
  221. encoding - CSV table encoding (default: utf-8)
  222. delimiter - CSV table delimiter (default: ;)
  223. quotechar - CSV table quotechar (default: ")
  224. '''
  225. with open(filename, 'wb') as f:
  226. csvfile = self.Writer(f, encoding='utf-8', delimiter=';', quotechar='"', **kwargs)
  227. csvfile.writerow(self.keys)
  228. csvfile.writerows(self.rows)