import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import shapely as shp
import hvplot.pandas
import matplotlib.pyplot as plt
import folium

from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from holoviews import opts, dim
from os.path import exists
from urllib.request import urlretrieve

import warnings

warnings.filterwarnings("ignore")

plot_defaults = {"linewidth": 5, "capstyle": "round", "figsize": (9, 3), "legend": True}
opts.defaults(
    opts.Overlay(active_tools=["wheel_zoom"], frame_width=500, frame_height=400)
)
hvplot_defaults = {"tiles": None, "cmap": "Viridis", "colorbar": True}

mpd.show_versions()

MovingPandas 0.20.0

SYSTEM INFO
-----------
python     : 3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:15:49) [MSC v.1941 64 bit (AMD64)]
executable : c:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\python.exe
machine    : Windows-10-10.0.19045-SP0

GEOS, GDAL, PROJ INFO
---------------------
GEOS       : None
GEOS lib   : None
GDAL       : None
GDAL data dir: None
PROJ       : 9.5.0
PROJ data dir: C:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\Library\share\proj

PYTHON DEPENDENCIES
-------------------
geopandas  : 1.0.1
pandas     : 2.2.3
fiona      : None
numpy      : 1.23.1
shapely    : 2.0.6
pyproj     : 3.7.0
matplotlib : 3.9.2
mapclassify: 2.8.1
geopy      : 2.4.1
holoviews  : 1.20.0
hvplot     : 0.11.1
geoviews   : 1.13.0
stonesoup  : 1.4

%%time
df = read_file("../data/ais.gpkg")
print(f"Finished reading {len(df)}")

Finished reading 84702
CPU times: total: 922 ms
Wall time: 925 ms

df.head()

df.plot()

<Axes: >

df["SOG"].hist(bins=100, figsize=(15, 3))

<Axes: >

print(f"Original size: {len(df)} rows")
df = df[df.SOG > 0]
print(f"Reduced to {len(df)} rows after removing 0 speed records")
df["SOG"].hist(bins=100, figsize=(15, 3))

Original size: 84702 rows
Reduced to 33593 rows after removing 0 speed records

<Axes: >

df["ShipType"].value_counts().plot(kind="bar", figsize=(15, 3))

<Axes: xlabel='ShipType'>

%%time
df["t"] = pd.to_datetime(df["Timestamp"], format="%d/%m/%Y %H:%M:%S")
traj_collection = mpd.TrajectoryCollection(df, "MMSI", t="t", min_length=100)
print(f"Finished creating {len(traj_collection)} trajectories")

Finished creating 77 trajectories
CPU times: total: 4.31 s
Wall time: 4.32 s

traj_collection = mpd.MinTimeDeltaGeneralizer(traj_collection).generalize(
    tolerance=timedelta(minutes=1)
)

shiptype_to_color = {
    "Passenger": "blue",
    "HSC": "green",
    "Tanker": "red",
    "Cargo": "orange",
    "Sailing": "grey",
    "Other": "grey",
    "Tug": "grey",
    "SAR": "grey",
    "Undefined": "grey",
    "Pleasure": "grey",
    "Dredging": "grey",
    "Law enforcement": "grey",
    "Pilot": "grey",
    "Fishing": "grey",
    "Diving": "grey",
    "Spare 2": "grey",
}
traj_collection.plot(
    column="ShipType", column_to_color=shiptype_to_color, linewidth=1, capstyle="round"
)

<Axes: >

passenger = traj_collection.filter("ShipType", "Passenger")
passenger.hvplot(
    title="Passenger ferries", line_width=2, frame_width=700, frame_height=500
)

passenger.explore(
    column="MMSI",
    cmap="turbo",
    tiles="CartoDB positron",
    tooltip="Name",
    popup=True,
    legend=False,
)

my_traj = traj_collection.trajectories[0]
my_traj.df.head()

my_traj.df.tail()

my_traj.hvplot(
    title=f"Trajectory {my_traj.id}",
    frame_width=700,
    frame_height=500,
    line_width=5.0,
    c="NavStatus",
    cmap="Dark2",
)

my_traj.explore(
    column="SOG", cmap="plasma", tiles="CartoDB positron", style_kwds={"weight": 5}
)

area_of_interest = Polygon(
    [
        (11.89935, 57.69270),
        (11.90161, 57.68902),
        (11.90334, 57.68967),
        (11.90104, 57.69354),
        (11.89935, 57.69270),
    ]
)

intersecting = traj_collection.get_intersecting(area_of_interest)
print(f"Found {len(intersecting)} intersections")

Found 20 intersections

bridge_traj = intersecting.trajectories[0]
bridge_traj.hvplot(
    title=f"Trajectory {bridge_traj.id}",
    frame_width=700,
    frame_height=500,
    line_width=5.0,
    c="NavStatus",
    cmap="Dark2",
)

bridge_traj.explore(color="red", tiles="CartoDB positron", style_kwds={"weight": 5})

bridge_traj.df.head()

trips = mpd.ObservationGapSplitter(passenger).split(gap=timedelta(minutes=5))
print(
    f"Extracted {len(trips)} individual trips from {len(passenger)} continuous vessel tracks"
)

Extracted 90 individual trips from 14 continuous vessel tracks

trips.hvplot(
    title="Passenger ferry trips", line_width=2, frame_width=700, frame_height=500
)

trips.explore(
    column="MMSI",
    cmap="hsv",
    tiles="CartoDB positron",
    tooltip="Name",
    popup=True,
    legend=False,
)

origins = trips.get_start_locations()
origins.hvplot(
    title="Trip origins by ship type",
    c="Name",
    geo=True,
    tiles="OSM",
    frame_width=700,
    frame_height=500,
)

origins_gdf = gpd.GeoDataFrame(origins, geometry="geometry", crs=4326)

origins_gdf.explore(column="Name", tiles="CartoDB positron", marker_kwds={"radius": 5})

origins.hvplot(
    title="Origins by speed",
    c="SOG",
    geo=True,
    tiles="OSM",
    frame_width=700,
    frame_height=500,
)

origins_gdf.explore(
    column="SOG", tiles="CartoDB positron", legend=False, marker_kwds={"radius": 5}
)

trips = mpd.ObservationGapSplitter(traj_collection).split(gap=timedelta(minutes=5))
area_of_interest = Polygon(
    [
        (11.86815, 57.68273),
        (11.86992, 57.68047),
        (11.87419, 57.68140),
        (11.87288, 57.68348),
        (11.86815, 57.68273),
    ]
)

departures = [
    traj
    for traj in trips
    if traj.get_start_location().intersects(area_of_interest)
    and traj.get_length() > 100
]
print(f"Found {len(departures)} departures")

Found 12 departures

tc = mpd.TrajectoryCollection(departures)
tc.hvplot(
    title=f"Ships departing from Sjöfartsverket",
    line_width=3,
    frame_width=700,
    frame_height=500,
    hover_cols=["Name"],
)

for traj in departures:
    print(
        f"{traj.df['ShipType'].iloc[0]} vessel '{traj.df['Name'].iloc[0]}' departed at {traj.get_start_time()}"
    )

Law enforcement vessel 'KBV 010' departed at 2017-07-05 10:36:03
Law enforcement vessel 'KBV 010' departed at 2017-07-05 14:33:02
Law enforcement vessel 'KBV 048' departed at 2017-07-05 10:20:44
Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 01:21:07
Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 04:15:04
Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 06:58:56
Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 08:45:08
Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 12:02:18
Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 13:34:42
Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 22:32:47
Pilot vessel 'PILOT 218 SE' departed at 2017-07-05 09:27:24
Pilot vessel 'PILOT 218 SE' departed at 2017-07-05 16:10:29

arrivals = [
    traj
    for traj in trips
    if traj.get_end_location().intersects(area_of_interest) and traj.get_length() > 100
]
print(f"Found {len(arrivals)} arrivals")

for traj in arrivals:
    print(
        f"{traj.df['ShipType'].iloc[0]} vessel '{traj.df['Name'].iloc[0]}' arrived at {traj.get_end_time()}"
    )

Found 12 arrivals
Law enforcement vessel 'KBV 010' arrived at 2017-07-05 10:51:03
Law enforcement vessel 'KBV 048' arrived at 2017-07-05 10:26:44
Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 01:36:56
Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 04:45:36
Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 08:16:46
Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 08:54:34
Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 13:06:37
Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 16:44:06
Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 23:58:49
Pilot vessel 'PILOT 218 SE' arrived at 2017-07-05 10:07:23
Pilot vessel 'PILOT 218 SE' arrived at 2017-07-05 17:46:12
Tanker vessel 'DANA' arrived at 2017-07-05 08:35:42

tc = mpd.TrajectoryCollection(arrivals)
tc.hvplot(
    title=f"Ships arriving in Sjöfartsverket",
    line_width=3,
    frame_width=700,
    frame_height=500,
    hover_cols=["Name"],
)

from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from shapely.geometry import MultiPoint

origins = trips.get_start_locations()
origins["lat"] = origins.geometry.y
origins["lon"] = origins.geometry.x
matrix = origins[["lat", "lon"]].values

kms_per_radian = 6371.0088
epsilon = 0.1 / kms_per_radian

db = DBSCAN(eps=epsilon, min_samples=1, algorithm="ball_tree", metric="haversine").fit(
    np.radians(matrix)
)
cluster_labels = db.labels_
num_clusters = len(set(cluster_labels))
clusters = pd.Series([matrix[cluster_labels == n] for n in range(num_clusters)])
print(f"Number of clusters: {num_clusters}")

Number of clusters: 69

origins["cluster"] = cluster_labels

def get_centermost_point(cluster):
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    return Point(tuple(centermost_point)[1], tuple(centermost_point)[0])


centermost_points = clusters.map(get_centermost_point)

origins.hvplot(
    title="Clustered origins",
    c="cluster",
    geo=True,
    tiles="OSM",
    cmap="glasbey_dark",
    frame_width=700,
    frame_height=500,
)

origins_by_cluster = pd.DataFrame(origins).groupby(["cluster"])
summary = origins_by_cluster["ShipType"].unique().to_frame(name="types")
summary["n"] = origins_by_cluster.size()
summary["sog"] = origins_by_cluster["SOG"].mean()
summary["geometry"] = centermost_points
summary = summary[summary["n"] > 1].sort_values(by="n", ascending=False)
summary.head()

cluster_of_interest_id = 28
origins[origins["cluster"] == cluster_of_interest_id].hvplot(
    title=f"Cluster {cluster_of_interest_id}",
    c="ShipType",
    geo=True,
    tiles="OSM",
    frame_width=700,
    frame_height=500,
)

(
    trips.hvplot(
        title="Origin clusters by speed",
        color="gray",
        line_width=1,
        frame_width=700,
        frame_height=500,
    )
    * GeoDataFrame(summary, crs=4326).hvplot(
        c="sog", size=np.sqrt(dim("n")) * 3, geo=True, cmap="RdYlGn"
    )
)

summary_gdf = gpd.GeoDataFrame(summary, crs=4326)

m = trips.explore(name="Trips", style_kwds={"weight": 1})

summary_gdf.explore(
    m=m,
    column="sog",
    legend=False,
    style_kwds={"style_function": lambda x: {"radius": x["properties"]["n"]}},
    name="Clusters",
)

folium.TileLayer("CartoDB positron").add_to(m)
folium.LayerControl().add_to(m)

m

	Timestamp	MMSI	NavStatus	COG	Name	ShipType	geometry
0	05/07/2017 00:00:03	219632000	Under way using engine	270.4	None	Undefined	POINT (11.85958 57.68817)
1	05/07/2017 00:00:05	265650970	Under way using engine	0.5	None	Undefined	POINT (11.84175 57.6615)
2	05/07/2017 00:00:06	265503900	Under way using engine	0.0	None	Undefined	POINT (11.9065 57.69077)
3	05/07/2017 00:00:14	219632000	Under way using engine	188.4	None	Undefined	POINT (11.85958 57.68817)
4	05/07/2017 00:00:19	265519650	Under way using engine	357.2	None	Undefined	POINT (11.87192 57.68233)

	Timestamp	MMSI	NavStatus	SOG	COG	Name	ShipType	geometry
t
2017-07-05 17:32:18	05/07/2017 17:32:18	210035000	Under way using engine	9.8	52.8	NORDIC HAMBURG	Cargo	POINT (11.80462 57.67612)
2017-07-05 17:33:18	05/07/2017 17:33:18	210035000	Under way using engine	9.5	58.9	NORDIC HAMBURG	Cargo	POINT (11.80875 57.67773)
2017-07-05 17:34:18	05/07/2017 17:34:18	210035000	Under way using engine	9.3	70.5	NORDIC HAMBURG	Cargo	POINT (11.81311 57.67879)
2017-07-05 17:35:28	05/07/2017 17:35:28	210035000	Under way using engine	9.5	71.1	NORDIC HAMBURG	Cargo	POINT (11.81855 57.67968)
2017-07-05 17:36:28	05/07/2017 17:36:28	210035000	Under way using engine	9.4	71.3	NORDIC HAMBURG	Cargo	POINT (11.82334 57.68044)

	Timestamp	MMSI	NavStatus	SOG	COG	Name	ShipType	geometry
t
2017-07-05 22:47:34	05/07/2017 22:47:34	210035000	Moored	0.1	276.0	NORDIC HAMBURG	Cargo	POINT (11.84571 57.68958)
2017-07-05 23:08:44	05/07/2017 23:08:44	210035000	Moored	0.1	96.0	NORDIC HAMBURG	Cargo	POINT (11.84571 57.68958)
2017-07-05 23:09:54	05/07/2017 23:09:54	210035000	Moored	0.1	96.0	NORDIC HAMBURG	Cargo	POINT (11.84571 57.68958)
2017-07-05 23:11:45	05/07/2017 23:11:45	210035000	Moored	0.1	96.0	NORDIC HAMBURG	Cargo	POINT (11.8457 57.68958)
2017-07-05 23:35:58	05/07/2017 23:35:58	210035000	Moored	0.1	276.0	NORDIC HAMBURG	Cargo	POINT (11.84571 57.68958)

	Timestamp	MMSI	NavStatus	SOG	COG	Name	ShipType	geometry
t
2017-07-05 01:21:57	05/07/2017 01:21:57	219011922	Under way using engine	8.8	227.7	SCANDINAVIA	Tanker	POINT (11.9123 57.69634)
2017-07-05 01:23:06	05/07/2017 01:23:06	219011922	Under way using engine	8.7	227.5	SCANDINAVIA	Tanker	POINT (11.90842 57.6944)
2017-07-05 01:24:06	05/07/2017 01:24:06	219011922	Under way using engine	8.7	227.0	SCANDINAVIA	Tanker	POINT (11.90515 57.69275)
2017-07-05 01:25:06	05/07/2017 01:25:06	219011922	Under way using engine	8.6	238.8	SCANDINAVIA	Tanker	POINT (11.90161 57.69129)
2017-07-05 01:26:08	05/07/2017 01:26:08	219011922	Under way using engine	8.5	245.4	SCANDINAVIA	Tanker	POINT (11.89763 57.69015)

	types	n	sog	geometry
cluster
5	[Tanker, Passenger, Undefined, Fishing, Cargo]	52	9.217308	POINT (11.911787 57.69663)
28	[Passenger, Undefined, HSC]	47	0.804255	POINT (11.84232 57.661593)
0	[Cargo, Tanker, Tug, Passenger]	28	11.946429	POINT (11.80495 57.676108)
27	[Passenger, Undefined, HSC]	24	15.9875	POINT (11.819332 57.648027)
11	[SAR, Passenger]	19	10.736842	POINT (11.804653 57.654408)

Ship data analysis example¶

Loading sample AIS data¶

Plotting trajectories¶

Visualizing trajectory properties¶

Finding ships passing under Älvsborgsbron bridge¶

Identifying trip origins and destinations¶

Finding ships that depart from Sjöfartsverket (Maritime Administration)¶

Clustering origins¶

Continue exploring MovingPandas¶