如何让 Python 代码更加 Pythonic?

54 阅读2分钟

一位 PHP 开发者正在学习 Python。他有一段脚本,用于从 Redis 存储中获取推文,从中提取信息并将其转储到 CSV 文件中,以便使用 LOAD DATA LOCAL INFILE 从 MySQL 数据库导入。他希望能让代码更加 Pythonic,并寻求建议和改进。

解决方案

1. 使用更 Pythonic 的流程控制

  • 用 for i in range(20) 替换 i=0; while(i<=end):
  • 用 if not last: 替换 if not last or last == None:

2. 更简洁的 if 语句

  • 用 if j['id'] != last_id: 替换 if(j['id'] <> last_id):

3. 使用更 Pythonic 的数据结构

  • 用 if not data: 替换 if len(data) == 0:
  • 用 hash_str = hash.hexdigest() 替换 hash_str = str(hash.hexdigest())
  • 用 writer.writerows(data) 替换 for item in data: writer.writerow(item)
  • 用 with open('tmp/'+file_name, mode='ab') as ofile: 替换 ofile = open('tmp/'+file_name, mode='ab') ... ofile.close()

4. 使用 with 语句

  • 用 with open('tmp/'+file_name, mode='ab') as ofile: 替换 ofile = open('tmp/'+file_name, mode='ab') ... ofile.close()

5. 使用 print "Upload Error:", uploadr[0] 替换 print "Upload Error: "+uploadr[0]

6. 使用更 Pythonic 的类和方法

  • 将代码组织成类和方法,使代码更具可读性和可维护性。

7. 运行 pylint 代码检查工具

  • 使用 pylint 代码检查工具检查代码,并根据其建议进行改进。

代码示例

def process_tweets():
  """Processes 0-20 tweets from Redis store"""
  data = []
  last_id = 0
  for i in range(20):
    last = red.pop_tweet()
    if not last:
      break

    t = TweetHandler(last)
    t.cleanup()
    t.extract()

    if t.get_tweet_id() == last_id:
      break

    tweet = t.proc()
    if tweet:
      data = data + [tweet]
      last_id = t.get_tweet_id()

    time.sleep(0.01)

  if not data:
    return False

  ch = CSVHandler(data)
  ch.pack_csv()
  ch.uploadr()

  source = "http://bot.tweelay.net/tweets.php"
  openanything.openAnything(
    source,
    etag=None,
    lastmodified=None,
    agent="Tweelay/%s (Redis)" % __version__
    )

class TweetHandler:
  """Cleans, Builds and returns needed data from Tweet"""
  def __init__(self, json):
    self.json = json
    self.tweet = None
    self.tweet_id = 0
    self.j = None

  def cleanup(self):
    """Takes JSON encoded tweet and cleans it up for processing"""
    self.tweet = unicode(self.json, "utf-8")
    self.tweet = re.sub('^s:[0-9]+:["]+', '', self.tweet)
    self.tweet = re.sub('\n["]+;$', '', self.tweet)

  def extract(self):
    """Takes cleaned up JSON encoded tweet and extracts the datas we need"""
    self.j = simplejson.loads(self.tweet)

  def proc(self):
    """Builds the datas from the JSON object"""
    try:
      return self.build()
    except KeyError:
      if 'delete' in self.j:
        return None
      else:
        print ";".join(["%s=%s" % (k, v) for k, v in self.j.items()])
        return None

  def build(self):
    """Builds tuple from JSON tweet"""
    return (
    self.j['user']['id'],
    self.j['user']['screen_name'].encode('utf-8'),
    self.j['text'].encode('utf-8'),
    self.j['id'],
    self.j['in_reply_to_status_id'],
    self.j['in_reply_to_user_id'],
    self.j['created_at'],
    __version__ )

  def get_tweet_id(self):
    """Return Tweet ID"""
    if 'id' in self.j:
      return self.j['id']

    if 'delete' in self.j:
      return self.j['delete']['status']['id']


class CSVHandler:
  """Takes list of tweets and saves them to a CSV
     file to be inserted into MySQL data store"""
  def __init__(self, data):
    self.data = data
    self.file_name = self.gen_file_name()

  def gen_file_name(self):
    """Generate unique file name"""
    now = datetime.datetime.now()

    hashr = hashlib.sha1()
    hashr.update(str(now))
    hashr.update(str(len(self.data)))

    hash_str = hashr.hexdigest()
    return hash_str+'.csv'

  def pack_csv(self):
    """Save tweet data to CSV file"""
    with open('tmp/'+self.file_name, mode='ab') as ofile:
      writer = csv.writer(
        ofile, delimiter=',',
        quotechar='"',
        quoting=csv.QUOTE_MINIMAL)
      writer.writerows(self.data)

  def uploadr(self):
    """Upload file to remote host"""
    url = "http://example.com/up.php?filename="+self.file_name
    uploadr = upload.upload_file(url, 'tmp/'+self.file_name)
    if uploadr[0] == 200:
      print "Upload: 200 - ("+str(len(self.data))+")", self.file_name
      print "-------"
      #os.remove('tmp/'+self.file_name)
    else:
      print "Upload Error:", uploadr[0]

if __name__ == "__main__":
  while True:
    process_tweets()
    time.sleep(1)