Files
football-data/football_data.py
2026-01-14 17:19:21 +08:00

79 lines
2.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from lxml import etree
import pandas as pd
import matplotlib.pyplot as plt
url = f'https://www.transfermarkt.com/manchester-city/leistungsdaten/verein/281/plus/1?reldata=%262023'
myheader = {'user-agent': 'Chrome'}
response = requests.get(url, headers=myheader)
# response.encoding = 'utf-8'
html = response.content
tree = etree.HTML(html)
players = tree.xpath('//tbody/tr')
# 提取数据
player_data = []
for player in players:
# 提取球员名字
name = player.xpath('.//img/@title')
name = name[0] if name else None
# 提取 PPG
ppg = player.xpath('.//td[contains(@class, "cp")]/text()')
ppg = ppg[0] if ppg else None
ppg = eval(str(ppg))
# 去掉未出场的球员
if ppg == 0:
continue
# 提取出场时间
playing_time = player.xpath('.//td[contains(@class, "rechts")]/text()')
playing_time = playing_time[0].replace('.', '').strip("'") if playing_time else None
playing_time = eval(str(playing_time))
# 如果 PPG 或出场时间缺失,跳过该球员
if name and ppg and playing_time:
player_data.append({
"name": name,
"PPG": ppg,
"playing_time": playing_time
})
# 将数据存储到 DataFrame
df = pd.DataFrame(player_data)
# 添加 "Matches" 列,出场时间除以 90保留两位小数
df['Matches'] = (df['playing_time'] / 90).round(2)
# 输出结果
print(df)
# 绘制散点图
plt.figure(figsize=(8, 6))
plt.scatter(df['Matches'], df['PPG'], color='blue', label='Player Data')
# 添加球员名字标注
for i, row in df.iterrows():
plt.text(row['Matches'], row['PPG'], row['name'], fontsize=9, ha='right', va='bottom')
# 添加分割线
split_x = 30
split_y = 2.37
plt.axhline(y=split_y, color='red', linestyle='--', linewidth=1.5, label='PPG Split (2.13)')
plt.axvline(x=split_x, color='green', linestyle='--', linewidth=1.5, label='Matches Split (30)')
# 设置标题和标签
plt.title("Scatter Plot with Player Names and Splitting Lines", fontsize=14)
plt.xlabel("Matches (Games Played)", fontsize=12)
plt.ylabel("PPG (Points Per Game)", fontsize=12)
# 添加网格
plt.grid(alpha=0.3)
# 添加图例
plt.legend()
# 显示图形
plt.show()