import os
from flask import Flask, jsonify, render_template, request, url_for, send_from_directory
from werkzeug.utils import secure_filename
import json
import requests
from bs4 import BeautifulSoup
IS_SERVERLESS = bool(os.environ.get('SERVERLESS'))
print(IS_SERVERLESS)
app = Flask(name)
初始化上传临时目录
def init_upload_dir():
UPLOAD_DIR = '/tmp/uploads' if IS_SERVERLESS else os.getcwd() + '/uploads'
if not os.path.exists(UPLOAD_DIR):
os.makedirs(UPLOAD_DIR)
app.config['UPLOAD_DIR'] = UPLOAD_DIR
init_upload_dir()
@app.route("/")
def index():
return render_template('index.html')
@app.route("/runs2")
def runs():
抓取文章列表
lists = get_html('www.54yk.com/', '.wrapper > .main > .wrap > .content > .block > .post')
data = []
print('lists>>>>>', len(lists))
for item in lists:
result = {
'href': item.find('a').get('href'),
'title': item.find('a').get_text(),
'intro': item.find(class_ = 'intro').get_text()
}
print('item1>>>>>', result['href'])
print('item2>>>>>', result['title'])
print('item3>>>>>', result['intro'])
抓取文章内容
contents = get_html(result['href'], '.wrapper > .main > .wrap > .content > .block > .post')
#print("contents>>>>>", contents)
print("contents1>>>>>", contents[0].find('h1').get_text())
print("contents2>>>>>", contents[0].find(class_ = 'cate').find('a').get_text())
addData = {
'title': contents[0].find('h1').get_text(),
'cate': contents[0].find(class_ = 'cate').find('a').get_text(),
'html': contents[0].find(class_ = 'single').get_text(),
'intro': result['intro']
}
添加到云数据库
postres = add_data(addData)
data.append(postres['data']['requestId'])
res = {
'code': 200,
'msg': 'sucess',
'data': data
}
return json.dumps(res)
def get_html(url, xpath):
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en;q=0.5",
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
}
strHtml = requests.get(url, headers=headers, timeout=30)
strHtml.encoding = 'utf-8'
soup = BeautifulSoup(strHtml.text, 'html.parser')
data = soup.select(xpath)
print("data>>>>>>", data)
return data
def add_data(data):
url = 'ttmap-1gqwmmpb7b972901-1302805790.ap-shanghai.app.tcloudbase.com/add_jsonDat…'
res = post_data(url, json.dumps(data))
请求转换成json格式
msgData = json.loads(res)
print("msgData>>", msgData)
return msgData
请求数据
def post_data(url, data):
try:
headers = {'Content-Type': 'application/json'}
res = requests.post(url=url, data=data, headers=headers, timeout=100)
print("postData>>", res.text)
except Exception as err:
print('postData.err>>', err)
return None
else:
return res.text
@app.route("/users", methods=['GET', 'POST'])
def users():
if request.method == 'POST':
print(request.form)
uid = request.form.get('uid');
user = {'uid': uid, 'name': 'test1'}
return jsonify(data=user)
else:
limit = request.args.get('limit')
data = {
'count': limit or 2,
'users': [{'name': 'test1'}, {'name': 'test2'}]
}
return jsonify(data=data)
@app.route("/users/")
def get_user(id):
return jsonify(data={'name': 'test1'})
上传文件示例
@app.route('/upload',methods=['POST'])
def upload():
if request.method == 'POST':
if 'avatar' not in request.files:
res = {"error": "No avatar file upload"}
return jsonify(data=res)
avatar = request.files['avatar']
if avatar.filename == '':
res = {"error": "No avatar file selected"}
return jsonify(data=res)
if avatar:
filename = secure_filename(avatar.filename);
filePath = os.path.join(app.config['UPLOAD_DIR'], filename)
avatar.save(filePath)
uploadUrl = url_for('uploaded_file', filename=filename)
res = {'upload': uploadUrl}
return jsonify(data=res)
@app.route('/uploads/')
def uploaded_file(filename):
return send_from_directory(app.config['UPLOAD_DIR'], filename)
启动服务,监听 9000 端口,监听地址为 0.0.0.0
app.run(debug=IS_SERVERLESS != True, port=9000, host='0.0.0.0')