Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

py2.py 6.9KB


  1. # -*- coding: utf-8 -*-
  2. import csv
  3. import numbers
  4. from itertools import izip
  5. pass_throughs = [
  6. 'register_dialect',
  7. 'unregister_dialect',
  8. 'get_dialect',
  9. 'list_dialects',
  10. 'field_size_limit',
  11. 'Dialect',
  12. 'excel',
  13. 'excel_tab',
  14. 'Sniffer',
  15. 'QUOTE_ALL',
  16. 'QUOTE_MINIMAL',
  17. 'QUOTE_NONNUMERIC',
  18. 'QUOTE_NONE',
  19. 'Error'
  20. ]
  21. __all__ = [
  22. 'reader',
  23. 'writer',
  24. 'DictReader',
  25. 'DictWriter',
  26. ] + pass_throughs
  27. for prop in pass_throughs:
  28. globals()[prop] = getattr(csv, prop)
  29. def _stringify(s, encoding, errors):
  30. if s is None:
  31. return ''
  32. if isinstance(s, unicode):
  33. return s.encode(encoding, errors)
  34. elif isinstance(s, numbers.Number):
  35. pass # let csv.QUOTE_NONNUMERIC do its thing.
  36. elif not isinstance(s, str):
  37. s = str(s)
  38. return s
  39. def _stringify_list(l, encoding, errors='strict'):
  40. try:
  41. return [_stringify(s, encoding, errors) for s in iter(l)]
  42. except TypeError as e:
  43. raise csv.Error(str(e))
  44. def _unicodify(s, encoding):
  45. if s is None:
  46. return None
  47. if isinstance(s, (unicode, int, float)):
  48. return s
  49. elif isinstance(s, str):
  50. return s.decode(encoding)
  51. return s
  52. class UnicodeWriter(object):
  53. """
  54. >>> import unicodecsv
  55. >>> from cStringIO import StringIO
  56. >>> f = StringIO()
  57. >>> w = unicodecsv.writer(f, encoding='utf-8')
  58. >>> w.writerow((u'é', u'ñ'))
  59. >>> f.seek(0)
  60. >>> r = unicodecsv.reader(f, encoding='utf-8')
  61. >>> row = r.next()
  62. >>> row[0] == u'é'
  63. True
  64. >>> row[1] == u'ñ'
  65. True
  66. """
  67. def __init__(self, f, dialect=csv.excel, encoding='utf-8', errors='strict',
  68. *args, **kwds):
  69. self.encoding = encoding
  70. self.writer = csv.writer(f, dialect, *args, **kwds)
  71. self.encoding_errors = errors
  72. def writerow(self, row):
  73. return self.writer.writerow(
  74. _stringify_list(row, self.encoding, self.encoding_errors))
  75. def writerows(self, rows):
  76. for row in rows:
  77. self.writerow(row)
  78. @property
  79. def dialect(self):
  80. return self.writer.dialect
  81. writer = UnicodeWriter
  82. class UnicodeReader(object):
  83. def __init__(self, f, dialect=None, encoding='utf-8', errors='strict',
  84. **kwds):
  85. format_params = ['delimiter', 'doublequote', 'escapechar',
  86. 'lineterminator', 'quotechar', 'quoting',
  87. 'skipinitialspace']
  88. if dialect is None:
  89. if not any([kwd_name in format_params
  90. for kwd_name in kwds.keys()]):
  91. dialect = csv.excel
  92. self.reader = csv.reader(f, dialect, **kwds)
  93. self.encoding = encoding
  94. self.encoding_errors = errors
  95. self._parse_numerics = bool(
  96. self.dialect.quoting & csv.QUOTE_NONNUMERIC)
  97. def next(self):
  98. row = self.reader.next()
  99. encoding = self.encoding
  100. encoding_errors = self.encoding_errors
  101. unicode_ = unicode
  102. if self._parse_numerics:
  103. float_ = float
  104. return [(value if isinstance(value, float_) else
  105. unicode_(value, encoding, encoding_errors))
  106. for value in row]
  107. else:
  108. return [unicode_(value, encoding, encoding_errors)
  109. for value in row]
  110. def __iter__(self):
  111. return self
  112. @property
  113. def dialect(self):
  114. return self.reader.dialect
  115. @property
  116. def line_num(self):
  117. return self.reader.line_num
  118. reader = UnicodeReader
  119. class DictWriter(csv.DictWriter):
  120. """
  121. >>> from cStringIO import StringIO
  122. >>> f = StringIO()
  123. >>> w = DictWriter(f, ['a', u'ñ', 'b'], restval=u'î')
  124. >>> w.writerow({'a':'1', u'ñ':'2'})
  125. >>> w.writerow({'a':'1', u'ñ':'2', 'b':u'ø'})
  126. >>> w.writerow({'a':u'é', u'ñ':'2'})
  127. >>> f.seek(0)
  128. >>> r = DictReader(f, fieldnames=['a', u'ñ'], restkey='r')
  129. >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'î']}
  130. True
  131. >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'\xc3\xb8']}
  132. True
  133. >>> r.next() == {'a': u'\xc3\xa9', u'ñ':'2', 'r': [u'\xc3\xae']}
  134. True
  135. """
  136. def __init__(self, csvfile, fieldnames, restval='',
  137. extrasaction='raise', dialect='excel', encoding='utf-8',
  138. errors='strict', *args, **kwds):
  139. self.encoding = encoding
  140. csv.DictWriter.__init__(self, csvfile, fieldnames, restval,
  141. extrasaction, dialect, *args, **kwds)
  142. self.writer = UnicodeWriter(csvfile, dialect, encoding=encoding,
  143. errors=errors, *args, **kwds)
  144. self.encoding_errors = errors
  145. def writeheader(self):
  146. header = dict(zip(self.fieldnames, self.fieldnames))
  147. self.writerow(header)
  148. class DictReader(csv.DictReader):
  149. """
  150. >>> from cStringIO import StringIO
  151. >>> f = StringIO()
  152. >>> w = DictWriter(f, fieldnames=['name', 'place'])
  153. >>> w.writerow({'name': 'Cary Grant', 'place': 'hollywood'})
  154. >>> w.writerow({'name': 'Nathan Brillstone', 'place': u'øLand'})
  155. >>> w.writerow({'name': u'Will ø. Unicoder', 'place': u'éSpandland'})
  156. >>> f.seek(0)
  157. >>> r = DictReader(f, fieldnames=['name', 'place'])
  158. >>> print r.next() == {'name': 'Cary Grant', 'place': 'hollywood'}
  159. True
  160. >>> print r.next() == {'name': 'Nathan Brillstone', 'place': u'øLand'}
  161. True
  162. >>> print r.next() == {'name': u'Will ø. Unicoder', 'place': u'éSpandland'}
  163. True
  164. """
  165. def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None,
  166. dialect='excel', encoding='utf-8', errors='strict', *args,
  167. **kwds):
  168. if fieldnames is not None:
  169. fieldnames = _stringify_list(fieldnames, encoding)
  170. csv.DictReader.__init__(self, csvfile, fieldnames, restkey, restval,
  171. dialect, *args, **kwds)
  172. self.reader = UnicodeReader(csvfile, dialect, encoding=encoding,
  173. errors=errors, *args, **kwds)
  174. if fieldnames is None and not hasattr(csv.DictReader, 'fieldnames'):
  175. # Python 2.5 fieldnames workaround.
  176. # See http://bugs.python.org/issue3436
  177. reader = UnicodeReader(csvfile, dialect, encoding=encoding,
  178. *args, **kwds)
  179. self.fieldnames = _stringify_list(reader.next(), reader.encoding)
  180. if self.fieldnames is not None:
  181. self.unicode_fieldnames = [_unicodify(f, encoding) for f in
  182. self.fieldnames]
  183. else:
  184. self.unicode_fieldnames = []
  185. self.unicode_restkey = _unicodify(restkey, encoding)
  186. def next(self):
  187. row = csv.DictReader.next(self)
  188. result = dict((uni_key, row[str_key]) for (str_key, uni_key) in
  189. izip(self.fieldnames, self.unicode_fieldnames))
  190. rest = row.get(self.restkey)
  191. if rest:
  192. result[self.unicode_restkey] = rest
  193. return result