first commit

2026-01-14 17:19:21 +08:00
parent 10cee828d7
commit 88e5bb418a
5 changed files with 196 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -1,2 +1,14 @@
-# football-data
+# football_data
+统计一些足球数据

+## 爬取足球赛录像
+
+football_game.py
+
+## 计算球员赛季正负值
+
+足球引入正负值的概念，参考视频：<https://www.bilibili.com/video/BV1kYBBYEEW7/>
+
+football_data.py是对视频中计算方法的实现，首先爬取指定网页的表格数据，然后进行可视化。巴塞罗那23/24赛季球员正负值展示如下：
+
+![image-20241125104039979](./assets/image-20241125104039979.png)
--- a/assets/image-20241125104039979.png
+++ b/assets/image-20241125104039979.png
--- a/football_data.py
+++ b/football_data.py
@@ -0,0 +1,78 @@
+import requests
+from lxml import etree
+import pandas as pd
+import matplotlib.pyplot as plt
+
+url = f'https://www.transfermarkt.com/manchester-city/leistungsdaten/verein/281/plus/1?reldata=%262023'
+myheader = {'user-agent': 'Chrome'}
+response = requests.get(url, headers=myheader)
+# response.encoding = 'utf-8'
+html = response.content
+tree = etree.HTML(html)
+
+players = tree.xpath('//tbody/tr')
+
+# 提取数据
+player_data = []
+for player in players:
+    # 提取球员名字
+    name = player.xpath('.//img/@title')
+    name = name[0] if name else None
+
+    # 提取 PPG
+    ppg = player.xpath('.//td[contains(@class, "cp")]/text()')
+    ppg = ppg[0] if ppg else None
+    ppg = eval(str(ppg))
+    # 去掉未出场的球员
+    if ppg == 0:
+        continue
+
+    # 提取出场时间
+    playing_time = player.xpath('.//td[contains(@class, "rechts")]/text()')
+    playing_time = playing_time[0].replace('.', '').strip("'") if playing_time else None
+    playing_time = eval(str(playing_time))
+
+    # 如果 PPG 或出场时间缺失，跳过该球员
+    if name and ppg and playing_time:
+        player_data.append({
+            "name": name,
+            "PPG": ppg,
+            "playing_time": playing_time
+        })
+
+# 将数据存储到 DataFrame
+df = pd.DataFrame(player_data)
+
+# 添加 "Matches" 列，出场时间除以 90，保留两位小数
+df['Matches'] = (df['playing_time'] / 90).round(2)
+
+# 输出结果
+print(df)
+
+# 绘制散点图
+plt.figure(figsize=(8, 6))
+plt.scatter(df['Matches'], df['PPG'], color='blue', label='Player Data')
+
+# 添加球员名字标注
+for i, row in df.iterrows():
+    plt.text(row['Matches'], row['PPG'], row['name'], fontsize=9, ha='right', va='bottom')
+
+# 添加分割线
+split_x = 30
+split_y = 2.37
+plt.axhline(y=split_y, color='red', linestyle='--', linewidth=1.5, label='PPG Split (2.13)')
+plt.axvline(x=split_x, color='green', linestyle='--', linewidth=1.5, label='Matches Split (30)')
+
+# 设置标题和标签
+plt.title("Scatter Plot with Player Names and Splitting Lines", fontsize=14)
+plt.xlabel("Matches (Games Played)", fontsize=12)
+plt.ylabel("PPG (Points Per Game)", fontsize=12)
+
+# 添加网格
+plt.grid(alpha=0.3)
+
+# 添加图例
+plt.legend()
+
+# 显示图形
+plt.show()
--- a/football_game.ipynb
+++ b/football_game.ipynb
@@ -0,0 +1,75 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from lxml import etree\n",
+    "\n",
+    "key='AC米兰'\n",
+    "\n",
+    "url = f'https://www.zhibo8.com/schedule/finish_more.htm'\n",
+    "myheader = {'user-agent': 'Chrome'}\n",
+    "response = requests.get(url, headers=myheader)\n",
+    "# response.encoding = 'utf-8'\n",
+    "html = response.content\n",
+    "element = etree.HTML(html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "https://news.zhibo8.com/zuqiu/2024-10-30/match1448348date2024vnative.htm\n"
+     ]
+    }
+   ],
+   "source": [
+    "span_elements = element.xpath('//li/span[@class=\"_teams\"]')\n",
+    "for span in span_elements:\n",
+    "    team=span.text\n",
+    "    # print(team)\n",
+    "    if key in team:\n",
+    "        # print(team)\n",
+    "        url_jijin=span.xpath('../a')[-1].get('href')\n",
+    "        print(url_jijin)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/football_game.py
+++ b/football_game.py
@@ -0,0 +1,30 @@
+import requests
+from lxml import etree
+
+# 只能搜索左边的队伍
+key = '伊普斯维奇'
+
+url = f'https://www.zhibo8.com/schedule/finish_more.htm'
+myheader = {'user-agent': 'Chrome'}
+response = requests.get(url, headers=myheader)
+response.encoding = 'utf-8'
+html = response.content
+element = etree.HTML(html)
+
+# 搜索定位（取第一个）
+span_elements = element.xpath('//li/span[@class="_teams"]')
+for span in span_elements:
+    team = span.text
+    # print(team)
+    if key in team:
+        break
+
+a_elements = span.xpath('../a')
+for a in a_elements:
+    url_jijin = a.get('href')
+    url_luxiang = 'https://www.zhibo8.com' + \
+        url_jijin.replace('jijin', 'luxiang')
+    print(url_jijin)
+    print(url_luxiang)
+
+# https://www.zhibo8.com/zuqiu/2024/1030-match1448348v-luxiang.htm