编写了一个 Python 脚本,该脚本不断从 Twitter 抓取数据并将其写入文件。希望该程序在每小时写入当前时间戳到文件中。代码如下所示:
#! /usr/bin/env python
import tweetstream
import simplejson
import urllib
import time
import datetime
import sched
class twit:
def __init__(self,uname,pswd,filepath):
self.uname=uname
self.password=pswd
self.filepath=open(filepath,"wb")
def main(self):
i=0
s = sched.scheduler(time.time, time.sleep)
output=self.filepath
#Grab every tweet using Streaming API
with tweetstream.TweetStream(self.uname, self.password) as stream:
for tweet in stream:
if tweet.has_key("text"):
try:
#Write tweet to file and print it to STDOUT
message=tweet['text']+ "\n"
output.write(message)
print tweet['user']['screen_name'] + ": " + tweet['text'], "\n"
################################
#Timestamp code
#Timestamps should be placed once every hour
s.enter(10, 1, t.timestamp, (s,))
s.run()
except KeyError:
pass
def timestamp(self,sc):
now = datetime.datetime.now()
current_time= now.strftime("%Y-%m-%d %H:%M")
print current_time
self.filepath.write(current_time+"\n")
if __name__=='__main__':
t=twit("rohanbk","cookie","tweets.txt")
t.main()
2、解决方案
为了解决这个问题,可以修改代码,使用 sched.scheduler() 模块来安排一次性任务。以下是修改后的代码:
import tweetstream
import simplejson
import urllib
import time
import datetime
import sched
class twit:
def __init__(self,uname,pswd,filepath):
self.uname=uname
self.password=pswd
self.filepath=open(filepath,"wb")
def main(self):
i=0
s = sched.scheduler(time.time, time.sleep)
output=self.filepath
#Grab every tweet using Streaming API
with tweetstream.TweetStream(self.uname, self.password) as stream:
for tweet in stream:
if tweet.has_key("text"):
try:
#Write tweet to file and print it to STDOUT
message=tweet['text']+ "\n"
output.write(message)
print tweet['user']['screen_name'] + ": " + tweet['text'], "\n"
################################
#Timestamp code
#Timestamps should be placed once every hour
s.enter(3600, 1, self.timestamp, (s,))
s.run()
except KeyError:
pass
def timestamp(self,sc):
now = datetime.datetime.now()
current_time= now.strftime("%Y-%m-%d %H:%M")
print current_time
self.filepath.write(current_time+"\n")
if __name__=='__main__':
t=twit("rohanbk","cookie","tweets.txt")
t.main()
在该代码中,使用 s.enter(3600, 1, self.timestamp, (s,)) 函数安排每小时运行一次 timestamp() 函数。这样,就可以每小时将时间戳写入文件,而无需不断检查时间。