Soccer analysis exampleΒΆ
This tutorial uses data extracted from video footage of a soccer game that was published in https://github.com/Friends-of-Tracking-Data-FoTD/Last-Row
InΒ [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import shapely as shp
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from holoviews import opts, dim
from os.path import exists
from urllib.request import urlretrieve
import warnings
warnings.filterwarnings("ignore")
hvplot_defaults = {
"line_width": 5,
"frame_height": 350,
"frame_width": 700,
"colorbar": True,
"tiles": None,
"geo": False,
}
mpd.show_versions()
MovingPandas 0.20.0 SYSTEM INFO ----------- python : 3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:15:49) [MSC v.1941 64 bit (AMD64)] executable : c:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\python.exe machine : Windows-10-10.0.19045-SP0 GEOS, GDAL, PROJ INFO --------------------- GEOS : None GEOS lib : None GDAL : None GDAL data dir: None PROJ : 9.5.0 PROJ data dir: C:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\Library\share\proj PYTHON DEPENDENCIES ------------------- geopandas : 1.0.1 pandas : 2.2.3 fiona : None numpy : 1.23.1 shapely : 2.0.6 pyproj : 3.7.0 matplotlib : 3.9.2 mapclassify: 2.8.1 geopy : 2.4.1 holoviews : 1.20.0 hvplot : 0.11.1 geoviews : 1.13.0 stonesoup : 1.4
Loading soccer dataset from GithubΒΆ
InΒ [2]:
def get_file_from_url(url):
file = url.split("/")[-1]
if not exists(file):
urlretrieve(url, file)
return file
def get_df_from_gh_url(url):
file = get_file_from_url(url)
return pd.read_csv(file)
InΒ [3]:
input_file = "https://raw.githubusercontent.com/Friends-of-Tracking-Data-FoTD/Last-Row/master/datasets/positional_data/liverpool_2019.csv"
df = get_df_from_gh_url(input_file)
df.drop(columns=["Unnamed: 0"], inplace=True)
print(f"Number of records: {len(df)}")
Number of records: 74936
InΒ [4]:
df.head()
Out[4]:
bgcolor | dx | dy | edgecolor | frame | play | player | player_num | team | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | 0.000000 | 0.000000 | NaN | 0 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.394558 | 11.134454 | 0.0 |
1 | NaN | 0.185745 | 1.217580 | NaN | 1 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.580302 | 12.352034 | 0.0 |
2 | NaN | 0.178659 | 1.171133 | NaN | 2 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.758961 | 13.523166 | 0.0 |
3 | NaN | 0.171573 | 1.124685 | NaN | 3 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.930535 | 14.647852 | 0.0 |
4 | NaN | 0.164488 | 1.078238 | NaN | 4 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 47.095022 | 15.726090 | 0.0 |
From the metadata:
- play: the scoreline after the goal. The team who scored the goal is the one next to the brackets.
- frame: the frame number for the current location. Data provided has 20 frames per second.
- player: the id of the player. The id is consistent within a play but not between plays.
- player_num: the player jersey number. This number is the official one, and did not change for Liverpool in 2019. You can check the corresponding names at this wikipedia link.
- x, y: coordinates for the player/ball. Pitch coordinates go from 0 to 100 on each axis.
- dx, dx: change in (x,y) coordinates from last frame to current frame
- z: height, from 0 to 1.5 (only filled for the ball)
- bgcolor: the main color for the team (used as background color)
- edgecolor the secondary color (used as edge color)
And according to https://en.wikipedia.org/wiki/Football_pitch
the preferred size for many professional teams' stadiums is 105 by 68 metres
InΒ [5]:
plays = list(df.play.unique())
def to_timestamp(row):
# plays to date
day = plays.index(row.play) + 1
start_time = datetime(2019, 1, day, 12, 0, 0)
# frames to time
td = timedelta(milliseconds=1000 / 20 * row.frame)
return start_time + td
# frame: the frame number for the current location. Data provided has 20 frames per second
df["time"] = df.apply(to_timestamp, axis=1)
df.set_index("time", inplace=True)
# the preferred size for many professional teams' stadiums is 105 by 68 metres, accoring to https://en.wikipedia.org/wiki/Football_pitch
pitch_length = 105
pitch_width = 68
df.x = df.x / 100 * pitch_length
df.y = df.y / 100 * pitch_width
df
Out[5]:
bgcolor | dx | dy | edgecolor | frame | play | player | player_num | team | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
time | ||||||||||||
2019-01-01 12:00:00.000 | NaN | 0.000000 | 0.000000 | NaN | 0 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 48.714286 | 7.571429 | 0.0 |
2019-01-01 12:00:00.050 | NaN | 0.185745 | 1.217580 | NaN | 1 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 48.909318 | 8.399383 | 0.0 |
2019-01-01 12:00:00.100 | NaN | 0.178659 | 1.171133 | NaN | 2 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 49.096909 | 9.195753 | 0.0 |
2019-01-01 12:00:00.150 | NaN | 0.171573 | 1.124685 | NaN | 3 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 49.277061 | 9.960539 | 0.0 |
2019-01-01 12:00:00.200 | NaN | 0.164488 | 1.078238 | NaN | 4 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 49.449774 | 10.693741 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2019-01-19 12:00:06.000 | blue | 0.000000 | 0.000000 | white | 120 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
2019-01-19 12:00:06.050 | blue | 0.000000 | 0.000000 | white | 121 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
2019-01-19 12:00:06.100 | blue | 0.000000 | 0.000000 | white | 122 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
2019-01-19 12:00:06.150 | blue | 0.000000 | 0.000000 | white | 123 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
2019-01-19 12:00:06.200 | blue | 0.000000 | 0.000000 | white | 124 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
74936 rows Γ 12 columns
InΒ [6]:
df["team"].value_counts().plot(title="team", kind="bar", figsize=(15, 3))
Out[6]:
<Axes: title={'center': 'team'}, xlabel='team'>
InΒ [7]:
df["player_num"].value_counts().plot(title="player_num", kind="bar", figsize=(15, 3))
Out[7]:
<Axes: title={'center': 'player_num'}, xlabel='player_num'>
InΒ [8]:
df["team"] = df["team"].astype("category").cat.as_ordered()
df["player"] = df["player"].astype("category").cat.as_ordered()
df["player_num"] = df["player_num"].astype("category").cat.as_ordered()
Finally, let's create trajectories:
TrajectoriesΒΆ
InΒ [9]:
%%time
CRS = None
tc = mpd.TrajectoryCollection(df, "player", x="x", y="y", crs=CRS)
mpd.TemporalSplitter(tc).split(mode="day")
print(f"Finished creating {len(tc)} trajectories")
Finished creating 364 trajectories CPU times: total: 6.41 s Wall time: 6.43 s
InΒ [10]:
pitch = Polygon(
[(0, 0), (0, pitch_width), (pitch_length, pitch_width), (pitch_length, 0), (0, 0)]
)
plotted_pitch = GeoDataFrame(
pd.DataFrame([{"geometry": pitch, "id": 1}]), crs=CRS
).hvplot(color="white", alpha=0.5)
InΒ [11]:
plotted_pitch * tc.filter("player_num", 20).hvplot(**hvplot_defaults)
Out[11]:
PlaysΒΆ
InΒ [12]:
PLAY = 2
title = f"Play {PLAY} {plays[PLAY]}"
play_trajs = tc.filter("play", plays[PLAY])
play_trajs
Out[12]:
TrajectoryCollection with 20 trajectories
InΒ [13]:
play_trajs.plot(column="team", colormap={"attack": "hotpink", "defense": "turquoise"})
Out[13]:
<Axes: >
InΒ [14]:
generalized = mpd.MinTimeDeltaGeneralizer(play_trajs).generalize(
tolerance=timedelta(seconds=0.5)
)
InΒ [15]:
generalized.add_speed()
Out[15]:
TrajectoryCollection with 20 trajectories
InΒ [16]:
generalized.hvplot(
title=title, c="speed", hover_cols=["player", "team"], **hvplot_defaults
)
Out[16]:
InΒ [17]:
(
plotted_pitch
* generalized.hvplot(
title=title, c="speed", hover_cols=["player"], cmap="Viridis", **hvplot_defaults
)
)
Out[17]:
InΒ [18]:
get_file_from_url(
"https://github.com/movingpandas/movingpandas/raw/main/tutorials/data/soccer_field.png"
)
pitch_img = hv.RGB.load_image(
"soccer_field.png", bounds=(0, 0, pitch_length, pitch_width)
)
(
pitch_img
* generalized.hvplot(
title=title,
c="team",
colormap={"attack": "limegreen", "defense": "purple"},
hover_cols=["team"],
**hvplot_defaults
)
* generalized.get_start_locations().hvplot(label="start", color="orange")
)
Out[18]: