Comments (5)
import hashlib
import json
import time
import pymysql
import requests
from fake_useragent import UserAgent
from requests.exceptions import RequestException
import trip
ua = UserAgent()
s = requests.session()
def getASCP():
t = round(time.time())
e = hex(t).upper()[2:]
m = hashlib.md5()
m.update(str(t).encode(encoding='utf-8'))
i = m.hexdigest().upper()
if len(e) != 8:
AS = '479BB4B7254C150'
CP = '7E0AC8874BB0985'
return AS, CP
n = i[0:5]
a = i[-5:]
s = ''
r = ''
for o in range(5):
s += n[o] + e[o]
r += e[o + 3] + a[o]
AS = 'A1' + s + e[-3:]
CP = e[0:3] + r + 'E1'
return AS, CP
@trip.coroutine
def start_requests(maxtime=0):
AS, CP = getASCP()
headers = {'User-Agent': ua.random}
feed_url = 'https://www.toutiao.com/api/pc/feed/'
payloads = {'max_behot_time': maxtime, 'category': '__all__', 'utm_source': 'toutiao', 'widen': 1,
'tadrequire': 'false', 'as': AS, 'cp': CP}
global r
try:
r =yield trip.get(feed_url, params=payloads, headers=headers)
r_co=r.content
r.encoding="utf-8"
print(r.encoding)
r_js=r.json()
if 'data' in r.keys():
return r_js
except RequestException as e:
print('请求不成功', e)
return None
async def parse_detail(response):
for i in response.get('data', None):
if i.get('is_feed_ad') == False:
title = i.get('title')
tags = i.get('chinese_tag')
comments = i.get('comments_count')
result = {'title': title, 'tags': tags, 'comments': comments}
print(result)
insert_mysql(result)
if response.get('next'):
maxtime = response.get('next').get('max_behot_time')
return parse_detail(trip.run(start_requests(maxtime=maxtime)))
def write_json(result):
with open('tt.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(result, ensure_ascii=False) + '\n')
def insert_mysql(result):
try:
conn = pymysql.Connect(host="127.0.0.1", port=3306, user='root', passwd='root', db='spider', charset='utf8')
cursor = conn.cursor()
sql_in = "replace into lala (title,tags,comments) values(%s,%s,%s)"
cursor.execute(sql_in, (result['title'], result['tags'], result['comments']))
conn.commit()
except Exception as e:
print(e)
conn.rollback()
def main():
response = trip.run(start_requests)
trip.run(parse_detail(response))
if __name__ == '__main__':
main()
from trip.
@kof0012 是我gzip处理的时候的问题,你更新一下版本(0.0.3)即可。
python -m pip install trip -U
from trip.
@littlecodersh 感谢回复,另外请问怎么在trip.run(fun)里写参数。实现trip.run(fun(args)),还是抓今日头条的json文件,想要递归回调(带参数),想了半天想不出来办法。。求教。
import trip
import hashlib
import json
import time
import pymysql
import requests
from fake_useragent import UserAgent
from requests.exceptions import RequestException
ua = UserAgent()
conn = pymysql.Connect(host="127.0.0.1", port=3306,
user='root', passwd='root', db='spider', charset='utf8')
cursor = conn.cursor()
def getASCP():
t = round(time.time())
e = hex(t).upper()[2:]
m = hashlib.md5()
m.update(str(t).encode(encoding='utf-8'))
i = m.hexdigest().upper()
if len(e) != 8:
AS = '479BB4B7254C150'
CP = '7E0AC8874BB0985'
return AS, CP
n = i[0:5]
a = i[-5:]
s = ''
r = ''
for o in range(5):
s += n[o] + e[o]
r += e[o + 3] + a[o]
AS = 'A1' + s + e[-3:]
CP = e[0:3] + r + 'E1'
return AS, CP
@trip.coroutine
def start_requests(maxtime=0):
global r
AS, CP = getASCP()
headers = {'User-Agent': ua.random}
feed_url = 'https://www.toutiao.com/api/pc/feed/'
payloads = {'max_behot_time': maxtime, 'category': '__all__', 'utm_source': 'toutiao', 'widen': 1,
'tadrequire': 'false', 'as': AS, 'cp': CP}
try:
r =yield trip.get(feed_url, params=payloads, headers=headers)
res=r.json()
except RequestException as e:
print('请求不成功', e)
return None
def parse_detail(response):
global ss
for i in response.get('data', None):
if i.get('is_feed_ad') == False:
result = {'title': i.get('title'), 'tags': i.get('chinese_tag'), 'comments': i.get(
'comments_count'), 'url': 'https://www.toutiao.com' + i.get('source_url')}
print(result)
insert_mysql(result)
if response.get('next'):
maxtime = response.get('next').get('max_behot_time')
ss=start_requests(maxtime=maxtime)
return parse_detail(ss.json())
def write_json(result):
with open('tt.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(result, ensure_ascii=False) + '\n')
def insert_mysql(result):
try:
sql_in = "insert into toutiaocomment(title,tags,comments,url) VALUES(%s,%s,%s,%s) ON DUPLICATE KEY UPDATE comments=VALUES(comments)"
cursor.execute(
sql_in, (result['title'], result['tags'], result['comments'], result['url']))
conn.commit()
except Exception as e:
print(e)
conn.rollback()
def main():
trip.run(start_requests)
parse_detail(r.json())
if __name__ == '__main__':
main()
from trip.
from functools import partial
from trip.
@littlecodersh 多谢提醒,已经解决。
from trip.
Related Issues (17)
- Github 链接的文档URL错误 HOT 1
- how to use trip gracefully? HOT 6
- RuntimeError: IOLoop is already running HOT 5
- 大的网页应答, chunk方式出错。 trip 0.08 HOT 2
- AttributeError: 'HTTPHeaderDict' object has no attribute 'get_all' HOT 4
- 怎么跟asyncio的loop结合使用
- 下载图片时,有时成功,有时失败
- 无法获取trip.run的返回值
- 0.0.10 tarball missing
- 协程运行速度比多进程慢
- 依赖的request是版本是多少呢? HOT 2
- 有没有把文档英文化的计划呢? HOT 1
- 请问有没有和grequests的对比呢? HOT 1
- proxies参数被忽略 HOT 4
- The ability to use a proxy is one of the important features! When is the proxy planned? HOT 2
- python3 用await 提示错误 HOT 3
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from trip.