在构建自定义迭代器时总是只遍历一次文件,且在两次遍历的间隙中在相关文件对象上调用 seek(0),但是在第二次运行的 first call to next() 中却抛出 StopIteration 错误。
以下是相关代码:
class MappedIterator(object):
"""
Given an iterator of dicts or objects and a attribute mapping dict,
will make the objects accessible via the desired interface.
Currently it will only produce dictionaries with string values. Can be
made to support actual objects later on. Somehow... :D
"""
def __init__(self, obj=None, mapping={}, *args, **kwargs):
self._obj = obj
self._mapping = mapping
self.cnt = 0
def __iter__(self):
return self
def reset(self):
self.cnt = 0
def next(self):
try:
try:
item = self._obj.next()
except AttributeError:
item = self._obj[self.cnt]
# If no mapping is provided, an empty object will be returned.
mapped_obj = {}
for mapped_attr in self._mapping:
attr = mapped_attr.attribute
new_attr = mapped_attr.mapped_name
val = item.get(attr, '')
val = str(val).strip() # get rid of whitespace
# TODO: apply transformers...
# This allows multi attribute mapping or grouping of multiple
# attributes in to one.
try:
mapped_obj[new_attr] += val
except KeyError:
mapped_obj[new_attr] = val
self.cnt += 1
return mapped_obj
except (IndexError, StopIteration):
self.reset()
raise StopIteration
class CSVMapper(MappedIterator):
def __init__(self, reader, mapping={}, *args, **kwargs):
self._reader = reader
self._mapping = mapping
self._file = kwargs.pop('file')
super(CSVMapper, self).__init__(self._reader, self._mapping, *args, **kwargs)
@classmethod
def from_csv(cls, file, mapping, *args, **kwargs):
# TODO: Parse kwargs for various DictReader kwargs.
return cls(reader=DictReader(file), mapping=mapping, file=file)
def __len__(self):
return int(self._reader.line_num)
def reset(self):
if self._file:
self._file.seek(0)
super(CSVMapper, self).reset()
示例用法:
file = open('somefile.csv', 'rb') # say this file has 2 rows + a header row
mapping = MyMappingClass() # this isn't really relevant
reader = CSVMapper.from_csv(file, mapping)
# > 'John'
# > 'Bob'
for r in reader:
print r['name']
# This won't print anything
for r in reader:
print r['name']
2、解决方案
推荐以下解决方法:
方法一:
- 不建议使用 seek(0) 方法,而是每次从文件名打开文件。
- 不要在 iter() 方法中返回 self,因为这会导致对象只有一个实例。
- 在 iter() 方法中创建一个新对象,其中包含新初始化的 reader 对象和新打开的文件句柄对象,并将此新对象从 iter() 返回。
- 不要使用 MappedIterator 中 .next() 方法中的异常处理程序。
- iter() 方法应该返回一个可以可靠地迭代的对象。
修改后的代码示例:
class CSVMapper(object):
def __init__(self, reader, fname, mapping={}, **kwargs):
self._reader = reader
self._fname = fname
self._mapping = mapping
self._kwargs = kwargs
self.line_num = 0
def __iter__(self):
cls = type(self)
obj = cls(self._reader, self._fname, self._mapping, **self._kwargs)
if "open_with" in self._kwargs:
open_with = self._kwargs["open_with"]
f = open_with(self._fname, **self._kwargs)
else:
f = open(self._fname, "rt")
# "itr" is my standard abbreviation for an iterator instance
obj.itr = obj._reader(f)
return obj
def next(self):
item = self.itr.next()
self.line_num += 1
# If no mapping is provided, item is returned unchanged.
if not self._mapping:
return item # csv.reader() returns a list of string values
# we have a mapping so make a mapped object
mapped_obj = {}
key, value = item
if key in self._mapping:
return [self._mapping[key], value]
else:
return item
if __name__ == "__main__":
lst_csv = [
"foo, 0",
"one, 1",
"two, 2",
"three, 3",
]
import csv
mapping = {"foo": "bar"}
m = CSVMapper(csv.reader, lst_csv, mapping, open_with=iter)
for item in m: # will print every item
print item
for item in m: # will print every item again
print item
方法二:
对于 DictReader:
f = open(filename, "rb")
d = csv.DictReader(f, delimiter=",")
f.seek(0)
d.__init__(f, delimiter=",")
对于 DictWriter:
f = open(filename, "rb+")
d = csv.DictWriter(f, fieldnames=fields, delimiter=",")
f.seek(0)
f.truncate(0)
d.__init__(f, fieldnames=fields, delimiter=",")
d.writeheader()
f.flush()