python-pymysql-requests-beautifulsoup数据爬取mysql保存---java-mysql-spring_boot-jpa后台读取数据
python-爬虫# import urllib.request# import http.cookiejar# import pymysql# conn = pymysql.connect("localhost", "root", "123456", "test")# cursor = conn.cursor()#
·
python-爬虫
# import urllib.request
# import http.cookiejar
# import pymysql
# conn = pymysql.connect("localhost", "root", "123456", "test")
# cursor = conn.cursor()
# cursor.execute("DROP TABLE IF EXISTS employee")
# sql = """CREATE TABLE employee(first_name CHAR(20) NOT NULL,
# last_name CHAR(20),
# age INT,
# sex CHAR(1))"""
# cursor.execute(sql)
# sqlInsert = """INSERT INTO employee(first_name,last_name,age,sex) VALUES('李白','白居易',20,'男')"""
# try:
# cursor.execute(sqlInsert)
# cursor.execute(sqlInsert)
# conn.commit()
# except:
# conn.rollback()
# conn.close()
# 爬虫
# import requests
# from bs4 import BeautifulSoup
# import pymysql
#
# # 本地数据库
# sql_host = 'localhost'
# # 数据库的用户名
# sql_user = 'root'
# # 数据库密码
# sql_password = '123456'
# # 数据的名
# sql_name = 'test'
# SQL_INSERT = """INSERT INTO user_data(author,page,sex,age,vote,content) VALUES(%s,%s,%s,%s,%s,%s)"""
#
# def download_page(http_url):
# headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"}
# call_back = requests.get(http_url, headers=headers)
# return call_back.text
#
#
# def get_page_content(html, page):
# conn = pymysql.connect(sql_host, sql_user, sql_password, sql_name)
# cursor = conn.cursor()
# soup = BeautifulSoup(html, 'html.parser')
# con = soup.find(id='content-left')
# con_list = con.find_all('div', class_='article')
# for item in con_list:
# author = item.find('h2').string
# content = item.find('div', class_='content').find('span').get_text()
# stats = item.find('div', class_='stats')
# vote = stats.find('span', class_='stats-vote').find('i', class_='number').get_text()
# comments = stats.find('span', class_='stats-comments').find('i', class_='number').string
# author_info = item.find('div', class_='articleGender')
# if author_info is not None:
# class_list = author_info['class']
# age = author_info.string
# if 'womenIcon' in class_list:
# sex = '女'
# elif 'manIcon' in class_list:
# sex = '男'
# else:
# sex = ''
# else:
# sex = ''
# age = ''
# # cursor.execute(SQL_INSERT, ("name","data","gg","sd","dd"))
# cursor.execute(SQL_INSERT,(author,page,sex,age,vote,content))
# conn.commit()
# # conn.close()
#
# def main():
# conn = pymysql.connect(sql_host, sql_user, sql_password, sql_name)
# cursor = conn.cursor()
# cursor.execute("""DELETE FROM user_data""")
# conn.commit()
# conn.close()
# for i in range(1, 14):
# http_url = 'https://qiushibaike.com/text/page/{}'.format(i)
# html = download_page(http_url)
# get_page_content(html,i)
#
#
# if __name__ == '__main__':
# main()
import requests
from bs4 import BeautifulSoup
import pymysql
# 网页格式Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0 <div class="articleGender manIcon">20</div>
def get_html(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0'}
call_back = requests.get(url,headers = headers)
return call_back.text
def get_page_content(html,page):
soup = BeautifulSoup(html,'html.parser')
content = soup.find(id='content-left')
content_list = content.find_all('div',class_='article')
for list_item in content_list:
author_div = list_item.find('div',class_='author')
author = author_div.find('h2').string
author_info = author_div.find('div',class_='articleGender')
if author_info is not None:
info_list = author_info['class']
age = author_info.string
if 'manIcon' in info_list:
sex = '男'
elif 'womenIcon' in info_list:
sex = '女'
else:
sex = ''
else:
age = ''
sex = ''
print(author,sex,age)
if __name__ == '__main__':
url = 'https://www.baidu.com/'
html = get_html(url)
get_page_content(html,1)
java -spring_boo-mysql
数据类
@Entity
public class UserData {
@Id
private int id;
private String author;
private String page;
private String sex;
private String age;
private String vote;
private String content;
}
@Repository
public interface User extends JpaRepository<UserData,String> { }
接口控制器
@RestController
@RequestMapping(value = "/name")
public class Data {
@Autowired
private User user;
@RequestMapping(value = "/data")
public List<UserData> name() {
return user.findAll();
}
}
更多推荐



所有评论(0)