first commit
This commit is contained in:
14
README.md
14
README.md
@@ -1,2 +1,14 @@
|
||||
# football-data
|
||||
# football_data
|
||||
统计一些足球数据
|
||||
|
||||
## 爬取足球赛录像
|
||||
|
||||
football_game.py
|
||||
|
||||
## 计算球员赛季正负值
|
||||
|
||||
足球引入正负值的概念,参考视频:<https://www.bilibili.com/video/BV1kYBBYEEW7/>
|
||||
|
||||
football_data.py是对视频中计算方法的实现,首先爬取指定网页的表格数据,然后进行可视化。巴塞罗那23/24赛季球员正负值展示如下:
|
||||
|
||||

|
||||
|
||||
BIN
assets/image-20241125104039979.png
Normal file
BIN
assets/image-20241125104039979.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 82 KiB |
78
football_data.py
Normal file
78
football_data.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
url = f'https://www.transfermarkt.com/manchester-city/leistungsdaten/verein/281/plus/1?reldata=%262023'
|
||||
myheader = {'user-agent': 'Chrome'}
|
||||
response = requests.get(url, headers=myheader)
|
||||
# response.encoding = 'utf-8'
|
||||
html = response.content
|
||||
tree = etree.HTML(html)
|
||||
|
||||
players = tree.xpath('//tbody/tr')
|
||||
|
||||
# 提取数据
|
||||
player_data = []
|
||||
for player in players:
|
||||
# 提取球员名字
|
||||
name = player.xpath('.//img/@title')
|
||||
name = name[0] if name else None
|
||||
|
||||
# 提取 PPG
|
||||
ppg = player.xpath('.//td[contains(@class, "cp")]/text()')
|
||||
ppg = ppg[0] if ppg else None
|
||||
ppg = eval(str(ppg))
|
||||
# 去掉未出场的球员
|
||||
if ppg == 0:
|
||||
continue
|
||||
|
||||
# 提取出场时间
|
||||
playing_time = player.xpath('.//td[contains(@class, "rechts")]/text()')
|
||||
playing_time = playing_time[0].replace('.', '').strip("'") if playing_time else None
|
||||
playing_time = eval(str(playing_time))
|
||||
|
||||
# 如果 PPG 或出场时间缺失,跳过该球员
|
||||
if name and ppg and playing_time:
|
||||
player_data.append({
|
||||
"name": name,
|
||||
"PPG": ppg,
|
||||
"playing_time": playing_time
|
||||
})
|
||||
|
||||
# 将数据存储到 DataFrame
|
||||
df = pd.DataFrame(player_data)
|
||||
|
||||
# 添加 "Matches" 列,出场时间除以 90,保留两位小数
|
||||
df['Matches'] = (df['playing_time'] / 90).round(2)
|
||||
|
||||
# 输出结果
|
||||
print(df)
|
||||
|
||||
# 绘制散点图
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.scatter(df['Matches'], df['PPG'], color='blue', label='Player Data')
|
||||
|
||||
# 添加球员名字标注
|
||||
for i, row in df.iterrows():
|
||||
plt.text(row['Matches'], row['PPG'], row['name'], fontsize=9, ha='right', va='bottom')
|
||||
|
||||
# 添加分割线
|
||||
split_x = 30
|
||||
split_y = 2.37
|
||||
plt.axhline(y=split_y, color='red', linestyle='--', linewidth=1.5, label='PPG Split (2.13)')
|
||||
plt.axvline(x=split_x, color='green', linestyle='--', linewidth=1.5, label='Matches Split (30)')
|
||||
|
||||
# 设置标题和标签
|
||||
plt.title("Scatter Plot with Player Names and Splitting Lines", fontsize=14)
|
||||
plt.xlabel("Matches (Games Played)", fontsize=12)
|
||||
plt.ylabel("PPG (Points Per Game)", fontsize=12)
|
||||
|
||||
# 添加网格
|
||||
plt.grid(alpha=0.3)
|
||||
|
||||
# 添加图例
|
||||
plt.legend()
|
||||
|
||||
# 显示图形
|
||||
plt.show()
|
||||
75
football_game.ipynb
Normal file
75
football_game.ipynb
Normal file
@@ -0,0 +1,75 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from lxml import etree\n",
|
||||
"\n",
|
||||
"key='AC米兰'\n",
|
||||
"\n",
|
||||
"url = f'https://www.zhibo8.com/schedule/finish_more.htm'\n",
|
||||
"myheader = {'user-agent': 'Chrome'}\n",
|
||||
"response = requests.get(url, headers=myheader)\n",
|
||||
"# response.encoding = 'utf-8'\n",
|
||||
"html = response.content\n",
|
||||
"element = etree.HTML(html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"https://news.zhibo8.com/zuqiu/2024-10-30/match1448348date2024vnative.htm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"span_elements = element.xpath('//li/span[@class=\"_teams\"]')\n",
|
||||
"for span in span_elements:\n",
|
||||
" team=span.text\n",
|
||||
" # print(team)\n",
|
||||
" if key in team:\n",
|
||||
" # print(team)\n",
|
||||
" url_jijin=span.xpath('../a')[-1].get('href')\n",
|
||||
" print(url_jijin)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
30
football_game.py
Normal file
30
football_game.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
# 只能搜索左边的队伍
|
||||
key = '伊普斯维奇'
|
||||
|
||||
url = f'https://www.zhibo8.com/schedule/finish_more.htm'
|
||||
myheader = {'user-agent': 'Chrome'}
|
||||
response = requests.get(url, headers=myheader)
|
||||
response.encoding = 'utf-8'
|
||||
html = response.content
|
||||
element = etree.HTML(html)
|
||||
|
||||
# 搜索定位(取第一个)
|
||||
span_elements = element.xpath('//li/span[@class="_teams"]')
|
||||
for span in span_elements:
|
||||
team = span.text
|
||||
# print(team)
|
||||
if key in team:
|
||||
break
|
||||
|
||||
a_elements = span.xpath('../a')
|
||||
for a in a_elements:
|
||||
url_jijin = a.get('href')
|
||||
url_luxiang = 'https://www.zhibo8.com' + \
|
||||
url_jijin.replace('jijin', 'luxiang')
|
||||
print(url_jijin)
|
||||
print(url_luxiang)
|
||||
|
||||
# https://www.zhibo8.com/zuqiu/2024/1030-match1448348v-luxiang.htm
|
||||
Reference in New Issue
Block a user