import requests
import urllib.request
import bs4
import re
#import urllib3.request
import xlwt
#import sqlite3_to_mysql
def main():
print("hello")
baseurl = "https://movie.douban.com/top250?start="
# 1、爬取数据
datalist = getData(baseurl)
savepath = ".\\豆瓣电影Top250.xls"
# 3、保存数据
# savedata(savepath)
askURL("https://movie.douban.com/top250?start=0")
def askURL(url):
head = { #模拟浏览器头部信息,向豆瓣服务器请求发送信息
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
}
#用户代理,表示告诉豆瓣服务器,我们是怎样的机器、浏览器(本质上是告诉豆瓣服务器,我们的浏览器能够接收怎样水平的文件内容
#head = '"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"'
#url = "https://movie.douban.com/top250?start="
request = urllib.request.Request(url,headers=head)
html = ""
try:
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
print(html)
except urllib.error.URLError as e:
if hasattr(e,"code"):
print(e.code)
if hasattr(e,"reason"):
print(e.reason)
#return html
def getData(baseurl):
datalist = []
for i in range(0, 10): # 调用获取信息的函数10次
url = baseurl + str(i * 25)
html = askURL(url) # 保存获取到的网页源码
# 2、逐一解释数据
return datalist
print("baseurl")
def savedata(savepath):
if __name__ == "__main__": # 当程序执行是
# 调用函数
main()
小黑屋|Archiver|手机版|粤ICP备12005776号-5|su.sseuu.com
GMT+8, 2025-1-22 12:40 , Processed in 0.032547 second(s), 18 queries .
Powered by Discuz! X3.4 Licensed
© 2001-2013 Comsenz Inc.