- i have list of dictionaries containing unicode strings.
- csv.dictwriter can write list of dictionaries csv file.
- i want csv file encoded in utf8.
- the csv module cannot handle converting unicode strings utf8.
- the csv module documentation has example converting utf8:
:
def utf_8_encoder(unicode_csv_data): line in unicode_csv_data: yield line.encode('utf-8') it has class unicodewriter:.
but... how make dictwriter work these? wouldn't have inject in middle of it, catch disassembled dictionaries , encode them before writes them file? don't it.
if using python 2.7 or later, use dict comprehension remap dictionary utf-8 before passing dictwriter:
# coding: utf-8 import csv d = {'name':u'马克','pinyin':u'mǎkè'} f = open('out.csv','wb') f.write(u'\ufeff'.encode('utf8')) # bom (optional...excel needs open utf-8 file properly) w = csv.dictwriter(f,sorted(d.keys())) w.writeheader() w.writerow({k:v.encode('utf8') k,v in d.items()}) f.close() you can use idea update unicodewriter dictunicodewriter:
# coding: utf-8 import csv import cstringio import codecs class dictunicodewriter(object): def __init__(self, f, fieldnames, dialect=csv.excel, encoding="utf-8", **kwds): # redirect output queue self.queue = cstringio.stringio() self.writer = csv.dictwriter(self.queue, fieldnames, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() def writerow(self, d): self.writer.writerow({k:v.encode("utf-8") k,v in d.items()}) # fetch utf-8 output queue ... data = self.queue.getvalue() data = data.decode("utf-8") # ... , reencode target encoding data = self.encoder.encode(data) # write target stream self.stream.write(data) # empty queue self.queue.truncate(0) def writerows(self, rows): d in rows: self.writerow(d) def writeheader(self): self.writer.writeheader() d1 = {'name':u'马克','pinyin':u'mǎkè'} d2 = {'name':u'美国','pinyin':u'měiguó'} f = open('out.csv','wb') f.write(u'\ufeff'.encode('utf8')) # bom (optional...excel needs open utf-8 file properly) w = dictunicodewriter(f,sorted(d.keys())) w.writeheader() w.writerows([d1,d2]) f.close()
Comments
Post a Comment