您的当前位置:首页将爬取数据存入mysql

将爬取数据存入mysql

来源:小侦探旅游网
Python 与 mysql 数据连接 用pymysql

import pymysql



conn =pymysql.connect(host='127.0.0.1',user='root',password='123456',db='company',charset="utf8")

cur=conn.cursor()
sql=‘’‘

’‘’employee=cur.execute(sql) conn.commit() cur.close() conn.close()

基本操作大概就这样
这次爬取的是淘宝商品页面

过程就略了放代码


import requestsimport reimport pymysqldef getHTMLtext(url): try:
 r=requests.get(url,timeout=100)
 r.raise_for_status()
 r.encoding=r.apparent_encoding return r.text except: return ""def getpage(itl,html): try:
 plt=re.findall(r'"view_price":"[d.]*"',html)
 nlt=re.findall(r'"raw_title":".*?"',html) for i in range(len(plt)):
 price = eval(plt[i].split(':')[1])
 title = eval(nlt[i].split(':')[1])
 itl.append([price, title]) except: print("")def printgoods(itl):
 tplt = "{:2}	{:8}	{:16}"
 print(tplt.format("序号", "价格", "商品名称"))

 count = 0
 conn = pymysql.connect(host='127.0.0.1', user='root', password='123456', db='company',charset="utf8")

 cur = conn.cursor()

 sqlc = '''
 create table coffee(
 id int(11) not null auto_increment primary key,
 name varchar(255) not null,
 price float not null)DEFAULT CHARSET=utf8; '''

 try:
 A = cur.execute(sqlc)
 conn.commit() print('成功') except: print("错误") for g in itl:
 count = count + 1
 b=tplt.format(count, g[0], g[1])



 sqla = '''
 insert into coffee(name,price)
 values(%s,%s); '''
 try:
 B = cur.execute(sqla,(g[1],g[0]))
 conn.commit() print('成功') except: print("错误") # save_path = 'D:/taobao.txt'
 # f=open(save_path,'a')
 # # f.write(b+'
')
 # f.close()
 conn.commit()
 cur.close()
 conn.close()def main():
 goods="咖啡"
 depth =2
 start_url='https://s.taobao.com/search?q='+goods
 List =[] for i in range(depth): try:
 url =start_url +"&s="+ str(i*44)
 html=getHTMLtext(url)
 getpage(List,html) except: continue


 print(printgoods(List)) # savefiles(data)main()


可以看到所需要的数据已经存入数据库了

显示全文