import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import shapely as shp
import hvplot.pandas
import matplotlib.pyplot as plt

from pyproj import CRS
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from holoviews import opts, dim, Layout
from os.path import exists
from urllib.request import urlretrieve

import warnings

warnings.filterwarnings("ignore")

plot_defaults = {"linewidth": 5, "capstyle": "round", "figsize": (9, 3), "legend": True}
opts.defaults(
    opts.Overlay(active_tools=["wheel_zoom"], frame_width=300, frame_height=400)
)
hvplot_defaults = {"tiles": None, "cmap": "Viridis", "colorbar": True}

mpd.show_versions()

MovingPandas 0.20.0

SYSTEM INFO
-----------
python     : 3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:15:49) [MSC v.1941 64 bit (AMD64)]
executable : c:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\python.exe
machine    : Windows-10-10.0.19045-SP0

GEOS, GDAL, PROJ INFO
---------------------
GEOS       : None
GEOS lib   : None
GDAL       : None
GDAL data dir: None
PROJ       : 9.5.0
PROJ data dir: C:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\Library\share\proj

PYTHON DEPENDENCIES
-------------------
geopandas  : 1.0.1
pandas     : 2.2.3
fiona      : None
numpy      : 1.23.1
shapely    : 2.0.6
pyproj     : 3.7.0
matplotlib : 3.9.2
mapclassify: 2.8.1
geopy      : 2.4.1
holoviews  : 1.20.0
hvplot     : 0.11.1
geoviews   : 1.13.0
stonesoup  : 1.4

mpd.__version__

'0.20.0'

df = read_file("../data/horse_collar.gpkg")
df["t"] = pd.to_datetime(df["timestamp"])
df = df.set_index("t").tz_localize(None)
print("This dataset contains {} records.\nThe first lines are:".format(len(df)))
df.head()

This dataset contains 17517 records.
The first lines are:

df.columns

Index(['No', 'CollarID', 'UTC_Date', 'UTC_Time', 'LMT_Date', 'LMT_Time',
       'Origin', 'SCTS_Date', 'SCTS_Time', 'Latitude [?]', 'Longitude [?]',
       'lat', 'long', 'FixType', 'Main [V]', 'Beacon [V]', 'Sats', 'Sat',
       'C/N', 'Sat_1', 'C/N_1', 'Sat_2', 'C/N_2', 'Sat_3', 'C/N_3', 'Sat_4',
       'C/N_4', 'Sat_5', 'C/N_5', 'Sat_6', 'C/N_6', 'Sat_7', 'C/N_7', 'Sat_8',
       'C/N_8', 'Sat_9', 'C/N_9', 'Sat_10', 'C/N_10', 'Sat_11', 'C/N_11',
       'Mort. Status', 'Temp [?C]', 'Activity', 'Easting', 'Northing',
       'timestamp', 'geometry'],
      dtype='object')

df = df.drop(
    columns=[
        "LMT_Date",
        "LMT_Time",
        "Origin",
        "SCTS_Date",
        "SCTS_Time",
        "Latitude [?]",
        "Longitude [?]",
        "FixType",
        "Main [V]",
        "Beacon [V]",
        "Sats",
        "Sat",
        "C/N",
        "Sat_1",
        "C/N_1",
        "Sat_2",
        "C/N_2",
        "Sat_3",
        "C/N_3",
        "Sat_4",
        "C/N_4",
        "Sat_5",
        "C/N_5",
        "Sat_6",
        "C/N_6",
        "Sat_7",
        "C/N_7",
        "Sat_8",
        "C/N_8",
        "Sat_9",
        "C/N_9",
        "Sat_10",
        "C/N_10",
        "Sat_11",
        "C/N_11",
        "Easting",
        "Northing",
    ],
    axis=1,
)

df.head()

collar_id = df["CollarID"].unique()[0]
print("There is only one collar with ID {}.".format(collar_id))

There is only one collar with ID 30788.

df["Activity"].unique()

array([None], dtype=object)

original_crs = df.crs
original_crs

<Projected CRS: EPSG:25832>
Name: ETRS89 / UTM zone 32N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Europe between 6°E and 12°E: Austria; Denmark - onshore and offshore; Germany - onshore and offshore; Italy - onshore and offshore; Norway including Svalbard - onshore and offshore; Spain - offshore.
- bounds: (6.0, 36.53, 12.01, 84.01)
Coordinate Operation:
- name: UTM zone 32N
- method: Transverse Mercator
Datum: European Terrestrial Reference System 1989 ensemble
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

df.to_crs({"init": "epsg:4326"}).hvplot(
    title="Geographic extent of the dataset", geo=True, tiles="OSM"
)

pd.DataFrame(df).sort_values("lat").tail(2)

df = df[2:].to_crs({"init": "epsg:4326"})
(
    df.hvplot(title="OSM showing paths and fences", size=2, geo=True, tiles="OSM")
    + df.hvplot(
        title="Imagery showing land cover details",
        size=2,
        color="red",
        geo=True,
        tiles="EsriImagery",
    )
)

temp = df.to_crs(CRS(25832))
temp["geometry"] = temp["geometry"].buffer(5)
total_area = temp.dissolve(by="CollarID").area
total_area = total_area[collar_id] / 10000
print("The total area covered by the data is: {:,.2f} ha".format(total_area))

The total area covered by the data is: 65.19 ha

print(
    "The dataset covers the time between {} and {}.".format(
        df.index.min(), df.index.max()
    )
)

The dataset covers the time between 2018-11-14 12:30:08 and 2019-11-07 05:30:10.

print("That's {}".format(df.index.max() - df.index.min()))

That's 357 days 17:00:02

df["No"].resample("1d").count().hvplot(title="Number of records per day")

df["Y-M"] = df.index.to_period("M")
Layout(
    [
        df[df["Y-M"] == i].hvplot(title=str(i), size=2, geo=True, tiles="OSM")
        for i in df["Y-M"].unique()
    ]
)

t = df.reset_index().t
df = df.assign(delta_t=t.diff().values)
df["delta_t"] = df["delta_t"].dt.total_seconds() / 60
pd.DataFrame(df).hvplot.hist(
    "delta_t",
    title="Histogram of intervals between consecutive records (in minutes)",
    bins=60,
    bin_range=(0, 60),
)

tc = mpd.TrajectoryCollection(df, "CollarID")
traj = tc.trajectories[0]
traj.add_speed()
max_speed = traj.df.speed.max()
print(
    "The highest computed speed is {:,.2f} m/s ({:,.2f} km/h)".format(
        max_speed, max_speed * 3600 / 1000
    )
)

The highest computed speed is 0.82 m/s (2.94 km/h)

pd.DataFrame(traj.df).hvplot.hist(
    "speed", title="Histogram of speeds (in meters per second)", bins=90
)

traj.add_direction(overwrite=True)
pd.DataFrame(traj.df).hvplot.hist("direction", title="Histogram of directions", bins=90)

pd.DataFrame(traj.df).hvplot.heatmap(
    title="Mean speed by hour of day and month of year",
    x="t.hour",
    y="t.month",
    C="speed",
    reduce_function=np.mean,
)

traj.df["n"] = 1
pd.DataFrame(traj.df).hvplot.heatmap(
    title="Record count by temperature and month of year",
    x="Temp [?C]",
    y="t.month",
    C="n",
    reduce_function=np.sum,
)

pd.DataFrame(traj.df).hvplot.heatmap(
    title="Mean speed by temperature and month of year",
    x="Temp [?C]",
    y="t.month",
    C="speed",
    reduce_function=np.mean,
)

traj.df["dir_class"] = ((traj.df["direction"] - 22.5) / 45).round(0)
temp = traj.df
Layout(
    [
        temp[temp["dir_class"] == i].hvplot(
            geo=True,
            tiles="OSM",
            size=2,
            width=300,
            height=300,
            title=str(int(i * 45)) + "°",
        )
        for i in sorted(temp["dir_class"].unique())
    ]
)

traj.df["speed_class"] = (traj.df["speed"] * 2).round(1)
temp = traj.df
plots = []
for i in sorted(temp["speed_class"].unique()):
    filtered = temp[temp["speed_class"] == i]
    if len(filtered) < 10:
        continue
    plots.append(
        filtered.hvplot(
            geo=True,
            tiles="EsriImagery",
            color="red",
            size=2,
            width=300,
            height=300,
            title=str(i / 2),
        )
    )
Layout(plots)

tc.hvplot()

tc.to_traj_gdf().explore(color="orange", tiles="CartoDB positron")

daily = mpd.TemporalSplitter(tc).split(mode="day")

Layout(
    [
        daily.trajectories[i].hvplot(
            title=daily.trajectories[i].id, c="speed", line_width=2, cmap="RdYlBu"
        )
        for i in range(0, 7)
    ]
)

daily_starts = daily.get_start_locations()
daily_starts.set_index(pd.to_datetime(daily_starts["timestamp"]), inplace=True)
daily_starts["month"] = daily_starts.index.month
daily_starts.hvplot(c="month", geo=True, tiles="EsriImagery", cmap="autumn")

moving = mpd.TrajectoryCollection(traj.df[traj.df["speed"] > 0.05], "CollarID")
moving = mpd.ObservationGapSplitter(moving).split(gap=timedelta(minutes=70))

moving.get_start_locations().hvplot(
    c="month", geo=True, tiles="EsriImagery", color="red"
)

daily_lengths = [traj.get_length() for traj in daily]
daily_t = [traj.get_start_time() for traj in daily]

daily_lengths = pd.DataFrame(daily_lengths, index=daily_t, columns=["length"])
daily_lengths.hvplot(title="Daily trajectory length")

daily_areas = [
    (traj.id, traj.to_crs(CRS(25832)).to_linestring().convex_hull.area / 10000)
    for traj in daily
]
daily_areas = pd.DataFrame(daily_areas, index=daily_t, columns=["id", "area"])
daily_areas.hvplot(title="Daily covered area [ha]", y="area")

daily_areas = [
    (traj.id, traj.to_crs(CRS(25832)).to_linestring().buffer(15).area / 10000)
    for traj in daily
]
daily_areas = pd.DataFrame(daily_areas, index=daily_t, columns=["id", "area"])
daily_areas.hvplot(title="Daily covered area [ha]", y="area")

daily_areas.sort_values(by="area")[:10]

daily_areas = daily_areas.drop(datetime(2018, 11, 14, 12, 30, 8))
daily_areas = daily_areas.drop(datetime(2019, 11, 7, 0, 0, 9))

Layout(
    [
        daily.get_trajectory(i).hvplot(
            title=i, c="speed", line_width=2, cmap="RdYlBu", width=300, height=300
        )
        for i in daily_areas.sort_values(by="area")[:3].id
    ]
)

MAX_DIAMETER = 100
MIN_DURATION = timedelta(hours=3)

one_day = daily.get_trajectory("30788_2018-11-17 00:00:00")
one_day_stops = mpd.TrajectoryStopDetector(one_day).get_stop_segments(
    min_duration=MIN_DURATION, max_diameter=MAX_DIAMETER
)

(
    one_day.hvplot(
        title="Stops in Trajectory {}".format(one_day.id),
        line_width=3.0,
        color="slategray",
    )
    * one_day_stops.hvplot(line_width=5, tiles=None, color="deeppink")
    * one_day_stops.get_start_locations().hvplot(geo=True, size=200, color="deeppink")
)

%%time
stops = mpd.TrajectoryStopDetector(tc).get_stop_points(
    min_duration=MIN_DURATION, max_diameter=MAX_DIAMETER
)
len(stops)

CPU times: total: 8.89 s
Wall time: 9.29 s

369

stops.hvplot(geo=True, tiles="OSM", color="deeppink", size=MAX_DIAMETER, alpha=0.2)

stops_gdf = gpd.GeoDataFrame(stops, geometry="geometry", crs="EPSG:4326")

stops_gdf.explore(
    tiles="CartoDB positron",
    tooltip="traj_id",
    popup=True,
    marker_type="marker",
)

stops["duration_h"] = (
    stops["end_time"] - stops["start_time"]
).dt.total_seconds() / 3600
pd.DataFrame(stops)["duration_h"].hvplot.hist(
    title="Stop duration histogram", xlabel="Duration [hours]", ylabel="n", bins=12
)

	No	CollarID	UTC_Date	UTC_Time	LMT_Date	LMT_Time	Origin	SCTS_Date	SCTS_Time	Latitude [?]	...	C/N_10	Sat_11	C/N_11	Mort. Status	Temp [?C]	Activity	Easting	Northing	timestamp	geometry
t
2018-11-14 12:01:08	299	30788	14-11-2018	12:01:08	14-11-2018	12:01:08	GSM	14-11-2018	14:32:31	547433308	...	None	None	None	None	22.0	None	32.687.757.575	6.070.134.340	2018-11-14 12:01:08	POINT (687757.574 6070134.334)
2018-11-14 12:15:09	300	30788	14-11-2018	12:15:09	14-11-2018	12:15:09	GSM	14-11-2018	14:32:31	546768844	...	None	None	None	None	22.0	None	32.687.671.089	6.062.727.428	2018-11-14 12:15:09	POINT (687671.088 6062727.428)
2018-11-14 12:30:08	301	30788	14-11-2018	12:30:08	14-11-2018	12:30:08	GSM	14-11-2018	14:32:31	546270178	...	None	None	None	None	21.0	None	32.690.932.613	6.057.307.713	2018-11-14 12:30:08	POINT (690932.614 6057307.716)
2018-11-14 13:00:33	302	30788	14-11-2018	13:00:33	14-11-2018	13:00:33	GSM	14-11-2018	14:32:31	546258926	...	None	None	None	None	17.0	None	32.690.669.041	6.057.171.253	2018-11-14 13:00:33	POINT (690669.038 6057171.248)
2018-11-14 13:30:09	303	30788	14-11-2018	13:30:09	14-11-2018	13:30:09	GSM	14-11-2018	14:32:31	546261709	...	None	None	None	None	17.0	None	32.690.706.054	6.057.203.820	2018-11-14 13:30:09	POINT (690706.056 6057203.814)

	No	CollarID	UTC_Date	UTC_Time	lat	long	Mort. Status	Temp [?C]	Activity	timestamp	geometry
t
2018-11-14 12:01:08	299	30788	14-11-2018	12:01:08	54.743331	11.916987	None	22.0	None	2018-11-14 12:01:08	POINT (687757.574 6070134.334)
2018-11-14 12:15:09	300	30788	14-11-2018	12:15:09	54.676884	11.910876	None	22.0	None	2018-11-14 12:15:09	POINT (687671.088 6062727.428)
2018-11-14 12:30:08	301	30788	14-11-2018	12:30:08	54.627018	11.957852	None	21.0	None	2018-11-14 12:30:08	POINT (690932.614 6057307.716)
2018-11-14 13:00:33	302	30788	14-11-2018	13:00:33	54.625893	11.953686	None	17.0	None	2018-11-14 13:00:33	POINT (690669.038 6057171.248)
2018-11-14 13:30:09	303	30788	14-11-2018	13:30:09	54.626171	11.954280	None	17.0	None	2018-11-14 13:30:09	POINT (690706.056 6057203.814)

	id	area
2019-11-07 00:00:09	30788_2019-11-07 00:00:00	0.666244
2018-11-19 00:00:39	30788_2018-11-19 00:00:00	1.245522
2018-11-14 12:30:08	30788_2018-11-14 00:00:00	2.039815
2019-05-12 00:00:39	30788_2019-05-12 00:00:00	2.856620
2019-05-13 00:00:08	30788_2019-05-13 00:00:00	3.282696
2019-02-04 00:00:09	30788_2019-02-04 00:00:00	3.922588
2019-02-10 00:00:40	30788_2019-02-10 00:00:00	4.179243
2019-02-13 00:00:39	30788_2019-02-13 00:00:00	4.328685
2019-03-07 00:00:09	30788_2019-03-07 00:00:00	4.336160
2019-02-22 00:00:10	30788_2019-02-22 00:00:00	4.380967

Horse collar data exploration¶

Raw data import¶

1. Establishing an overview¶

Q1.1 Geographic extent: Is the geographical extent as expected and are there holes in the spatial coverage?¶

Q1.2 Temporal extent: Is the temporal extent as expected and are there holes in the temporal coverage?¶

Q1.3 Spatio-temporal gaps: Does the geographic extent vary over time or do holes appear during certain times?¶

2. Putting records in context¶

Q2.1 Sampling intervals: Is the data sampled at regular or irregular intervals?¶

Q2.2 Speed values: Are there any unrealistic movements?¶

Q2.3 Movement patterns: Are there any patterns in movement direction or speed?¶

Q2.4 Temporal context: Does the movement make sense in its temporal context?¶

Temperature context¶

Q2.5 Geographic context: Does the movement make sense in its geographic context?¶

3. Extracting trajectories & locations / events¶

3.1 Trajectory lines: Do the trajectory lines look plausible or are there indications of out of sequence positions or other unrealistic location jumps?¶

Movement during week #1¶

3.2 Home/depot locations: Do day trajectories start and end at the same home (for human and animal movement) or depot (for logistics applications) location?¶

3.3 Trajectory length¶

3.4 Covered area¶

Method 1: Convex hulls around trajectory¶

Method 2: Buffered trajectory¶

3.5 Stop detection¶

Continue exploring MovingPandas¶