import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import shapely as shp
import hvplot.pandas
import matplotlib.pyplot as plt
import h3
import folium

from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from holoviews import opts, dim
from os.path import exists
from urllib.request import urlretrieve

import warnings

warnings.filterwarnings("ignore")

plot_defaults = {"linewidth": 5, "capstyle": "round", "figsize": (9, 3), "legend": True}
opts.defaults(
    opts.Overlay(active_tools=["wheel_zoom"], frame_width=300, frame_height=500)
)
hvplot_defaults = {"tiles": None, "cmap": "Viridis", "colorbar": True}

mpd.show_versions()

MovingPandas 0.20.0

SYSTEM INFO
-----------
python     : 3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:15:49) [MSC v.1941 64 bit (AMD64)]
executable : c:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\python.exe
machine    : Windows-10-10.0.19045-SP0

GEOS, GDAL, PROJ INFO
---------------------
GEOS       : None
GEOS lib   : None
GDAL       : None
GDAL data dir: None
PROJ       : 9.5.0
PROJ data dir: C:\Users\Agarkovam\AppData\Local\miniforge3\envs\mpd-ex\Library\share\proj

PYTHON DEPENDENCIES
-------------------
geopandas  : 1.0.1
pandas     : 2.2.3
fiona      : None
numpy      : 1.23.1
shapely    : 2.0.6
pyproj     : 3.7.0
matplotlib : 3.9.2
mapclassify: 2.8.1
geopy      : 2.4.1
holoviews  : 1.20.0
hvplot     : 0.11.1
geoviews   : 1.13.0
stonesoup  : 1.4

%%time
df = pd.read_csv("../data/2021-01-30_all.zip", index_col=0)
print(f"Finished reading {len(df)}")

Finished reading 180573
CPU times: total: 453 ms
Wall time: 451 ms

df.head()

df.plot(c="pm2_5", x="long", y="lat", kind="scatter")

<Axes: xlabel='long', ylabel='lat'>

tc = mpd.TrajectoryCollection(df, "deviceId", t="dateTime", x="long", y="lat")
print(tc)

TrajectoryCollection with 11 trajectories

traj_gdf = tc.to_traj_gdf(agg={"pm2_5": "mean"})

traj_gdf.plot("pm2_5_mean", cmap="YlOrRd", linewidth=0.7, legend=True, aspect=1)

<Axes: >

split = mpd.ObservationGapSplitter(tc).split(gap=timedelta(minutes=10))
split

TrajectoryCollection with 76 trajectories

split = split.add_speed(units=("km", "h"))

traj_gdf = split.to_traj_gdf(agg={"pm2_5": "mean", "speed": "max"})

traj_gdf = traj_gdf[traj_gdf.speed_max < 108]

traj_gdf["start_t"] = traj_gdf["start_t"].astype(str)
traj_gdf["end_t"] = traj_gdf["end_t"].astype(str)

traj_gdf = traj_gdf.round(2)

traj_gdf.explore(
    column="pm2_5_mean",
    cmap="YlOrRd",
    tiles="CartoDB positron",
    style_kwds={"weight": 4},
)

point_gdf = split.to_point_gdf()

point_gdf = point_gdf[point_gdf.speed < 30]

point_gdf["x"] = point_gdf.geometry.x
point_gdf["y"] = point_gdf.geometry.y

res = 7
point_gdf["h3_cell"] = point_gdf.apply(
    lambda r: str(h3.latlng_to_cell(r.y, r.x, res)), axis=1
)
point_gdf.head()

h3_df_mean = point_gdf.groupby(["h3_cell"])["pm2_5"].mean().round(0).reset_index()
h3_df_mean = h3_df_mean.rename(columns={"pm2_5": "pm2_5_mean"})
h3_df_mean.head()

h3_df_max = point_gdf.groupby(["h3_cell"])["pm2_5"].max().reset_index()
h3_df_max = h3_df_max.rename(columns={"pm2_5": "pm2_5_max"})
h3_df_max.head()

def cell_to_shapely(cell):
    coords = h3.cell_to_boundary(cell)
    flipped = tuple(coord[::-1] for coord in coords)
    return Polygon(flipped)


h3_geoms_mean = h3_df_mean["h3_cell"].apply(lambda x: cell_to_shapely(x))
h3_gdf_mean = gpd.GeoDataFrame(data=h3_df_mean, geometry=h3_geoms_mean, crs=4326)

h3_geoms_max = h3_df_max["h3_cell"].apply(lambda x: cell_to_shapely(x))
h3_gdf_max = gpd.GeoDataFrame(data=h3_df_max, geometry=h3_geoms_max, crs=4326)

h3_gdf_mean.explore(column="pm2_5_mean", cmap="YlOrRd")

map = h3_gdf_mean.explore(column="pm2_5_mean", cmap="YlOrRd", name="PM2.5 mean")

traj_gdf.explore(m=map, name="Bus trajectories")

folium.TileLayer("Cartodb Positron").add_to(map)

folium.LayerControl().add_to(map)

map

h3_gdf_max = h3_gdf_max.rename(columns={"geometry": "geometry1"})

pollution = pd.concat([h3_gdf_mean, h3_gdf_max], axis=1)

(
    pollution.hvplot.polygons(
        geo=True, tiles="OSM", c="pm2_5_mean", alpha=0.8, title="Mean pollution data"
    )
    + pollution.hvplot.polygons(
        geo=True, tiles="OSM", c="pm2_5_max", alpha=0.8, title="Maximum pollution data"
    )
)

	uid	dateTime	deviceId	lat	long	pm1_0	pm2_5	pm10
0	0db83849-cd24-477a-a9d8-e48da1c914a3	2021-01-30 00:00:01+05:30	00000000c37f0aa8	28.579370	77.228798	94.0	142.0	156.0
1	a77c4ff0-7723-418b-a02e-5653e9fa4530	2021-01-30 00:00:01+05:30	10000000dc5bb76b	28.579414	77.231705	127.0	213.0	231.0
2	f3109afa-7234-44a1-9911-b01646e33ed8	2021-01-30 00:00:04+05:30	10000000dc5bb76b	28.579414	77.231705	126.0	214.0	231.0
3	39b07547-9467-45c6-85e9-3a917ce969f3	2021-01-30 00:00:04+05:30	00000000c37f0aa8	28.579367	77.228806	94.0	145.0	157.0
4	41b70559-af34-486d-975b-9de3bd30c0f0	2021-01-30 00:00:06+05:30	10000000dc5bb76b	28.579414	77.231705	128.0	218.0	238.0

	uid	deviceId	pm1_0	pm2_5	pm10	geometry	speed	x	y	h3_cell
dateTime
2021-01-30 20:21:29	28245f16-1106-4ab0-81bf-099de4e48d79	00000000078e6811_0	128.0	212.0	251.0	POINT (77.29612 28.51673)	27.443267	77.296121	28.516728	873da1072ffffff
2021-01-30 20:21:33	d0994c8b-208e-4bff-8afc-b091feac1162	00000000078e6811_0	128.0	213.0	254.0	POINT (77.29602 28.51699)	27.443262	77.296021	28.516989	873da1072ffffff
2021-01-30 20:21:38	55c47d6c-4c15-4807-8897-76cf1398ef3a	00000000078e6811_0	133.0	215.0	261.0	POINT (77.29585 28.51742)	28.561704	77.295853	28.517420	873da1072ffffff
2021-01-30 20:24:58	17d49dee-4b05-461a-83d3-4d95a12a02cc	00000000078e6811_0	140.0	227.0	260.0	POINT (77.28574 28.53448)	28.693861	77.285744	28.534485	873da1074ffffff
2021-01-30 20:25:51	738dab30-4b15-4b26-b5da-cad595527d3c	00000000078e6811_0	138.0	213.0	243.0	POINT (77.28287 28.53916)	17.840033	77.282872	28.539160	873da1074ffffff

	h3_cell	pm2_5_mean
0	873da1009ffffff	178.0
1	873da100dffffff	203.0
2	873da1054ffffff	128.0
3	873da1070ffffff	153.0
4	873da1072ffffff	159.0

	h3_cell	pm2_5_max
0	873da1009ffffff	819.0
1	873da100dffffff	659.0
2	873da1054ffffff	1007.0
3	873da1070ffffff	532.0
4	873da1072ffffff	408.0

Pollution data analysis example¶

Loading pollution data¶

Removing problematic trajectories¶

Plotting trajectories¶

Assigning H3 cell IDs to trajectory points¶

Visualizing pollution measurements¶

Continue exploring MovingPandas¶