df <- fread("../output/50.2.umap.tsv.gz")
df$latScale=squish(scale(df$lat),-2,2)
df$lonScale = squish(scale(df$lon),-2,2)
df$SpeedScale = squish(scale(df$Speed),0,50)
# map the lat lon to US states
library(maps)
library(mapdata)

metricTypes <- c("euclidean", "haversine")


# https://umap-learn.readthedocs.io/en/latest/embedding_space.html
# transformations to 2d
# x = np.sin(sphere_mapper.embedding_[:, 0]) * np.cos(sphere_mapper.embedding_[:, 1])
# y = np.sin(sphere_mapper.embedding_[:, 0]) * np.sin(sphere_mapper.embedding_[:, 1])
# z = np.cos(sphere_mapper.embedding_[:, 0])
#
# x = np.arctan2(x, y)
# y = -np.arccos(z)

summary

skim(df)
Data summary
Name df
Number of rows 4857144
Number of columns 44
Key NULL
_______________________
Column type frequency:
character 7
numeric 35
POSIXct 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Activity 0 1 4 10 0 7 0
Name 0 1 2 3 0 2 0
Notes 0 1 0 8 4184952 15 0
UUID 0 1 16 36 0 63 0
Version 0 1 0 28 820090 30 0
Visit 0 1 0 467 4854672 2473 0
imgS3 0 1 0 50 4857004 141 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Accuracy 0 1.00 6.04 1.59 -1.00 4.74 6.00 8.000000e+00 1.000000e+01 ▁▁▇▃▅
Elevation 0 1.00 309.11 308.89 -67.07 146.18 250.75 3.492200e+02 1.124989e+04 ▇▁▁▁▁
Heading 0 1.00 164.34 109.76 -1.00 70.66 165.92 2.590300e+02 3.600000e+02 ▇▆▇▆▆
Pressure 0 1.00 144.33 202.58 0.00 97.40 98.87 9.985000e+01 1.027210e+03 ▇▁▁▁▁
Speed 0 1.00 3.16 7.60 -1.00 0.00 0.00 1.390000e+00 4.989000e+01 ▇▁▁▁▁
UnixTime 0 1.00 1615763853.19 60327287.52 1525623850.00 1557692918.50 1622138392.50 1.674732e+09 1.704206e+09 ▇▅▃▅▇
HeartRate 4842796 0.00 88.11 28.44 -1.00 76.00 85.00 1.000000e+02 1.650000e+02 ▁▁▇▂▁
Distance 4810634 0.01 68964.98 174017.51 2.00 5703.61 14004.90 4.545882e+04 1.060107e+06 ▇▁▁▁▁
NumberOfSteps 4808496 0.01 123151.68 269954.22 4.00 7952.00 15226.00 6.748200e+04 1.153645e+06 ▇▁▁▁▁
AverageActivePace 4822571 0.01 1.05 0.70 0.36 0.73 0.93 1.070000e+00 1.509000e+01 ▇▁▁▁▁
CurrentCadence 4822631 0.01 1.67 0.32 0.99 1.48 1.66 1.780000e+00 2.710000e+00 ▃▇▇▂▁
CurrentPace 4822631 0.01 0.87 0.20 0.26 0.79 0.87 1.020000e+00 1.300000e+00 ▁▂▇▆▂
FloorsAscended 4824429 0.01 14.84 14.30 1.00 3.00 10.00 1.600000e+01 5.800000e+01 ▇▆▁▁▁
FloorsDescended 4824118 0.01 11.68 10.49 1.00 5.00 11.00 1.500000e+01 4.100000e+01 ▇▆▁▁▁
vAccuracy 4844573 0.00 4.54 3.83 0.80 1.60 3.00 6.400000e+00 3.740000e+01 ▇▂▁▁▁
AccelerometerX 4850479 0.00 -0.82 4.54 -13.76 -4.08 -0.42 1.200000e+00 9.210000e+00 ▁▆▇▇▅
AccelerometerY 4850479 0.00 -0.45 3.44 -22.32 -0.97 -0.44 5.700000e-01 1.690000e+01 ▁▁▇▂▁
AccelerometerZ 4850479 0.00 3.84 7.15 -14.43 -1.25 6.99 9.780000e+00 1.337000e+01 ▁▃▁▅▇
ActivityConfidence 4850479 0.00 100.00 0.00 100.00 100.00 100.00 1.000000e+02 1.000000e+02 ▁▁▇▁▁
GyroscopeX 4850479 0.00 0.00 0.30 -3.86 -0.01 0.00 1.000000e-02 4.170000e+00 ▁▁▇▁▁
GyroscopeY 4850479 0.00 0.01 0.23 -2.83 -0.01 0.00 1.000000e-02 3.860000e+00 ▁▁▇▁▁
GyroscopeZ 4850479 0.00 0.00 0.14 -2.71 -0.01 0.00 1.000000e-02 1.990000e+00 ▁▁▇▁▁
UserAccelerometerX 4850479 0.00 0.00 0.76 -12.67 -0.15 0.00 1.300000e-01 8.760000e+00 ▁▁▇▂▁
UserAccelerometerY 4850479 0.00 -0.04 0.77 -13.89 -0.08 0.00 7.000000e-02 1.166000e+01 ▁▁▇▁▁
UserAccelerometerZ 4850479 0.00 0.02 0.80 -16.13 -0.13 0.05 1.900000e-01 8.570000e+00 ▁▁▁▇▁
Lightmeter 4855139 0.00 195.51 308.98 1.00 17.00 78.00 1.470000e+02 1.067000e+03 ▇▁▁▁▁
lat 0 1.00 0.00 1.00 -13.11 -0.78 0.49 8.600000e-01 3.430000e+00 ▁▁▁▆▇
lon 0 1.00 0.00 1.00 -5.57 0.03 0.13 2.800000e-01 1.072000e+01 ▁▇▁▁▁
umap_euclidean0 0 1.00 4.47 14.10 -33.03 -3.59 4.47 1.256000e+01 4.191000e+01 ▁▃▇▃▁
umap_euclidean1 0 1.00 1.30 14.15 -36.53 -6.82 1.25 9.360000e+00 3.881000e+01 ▁▃▇▃▁
umap_haversine0 0 1.00 4.36 120.81 -855.47 -54.06 4.53 6.214000e+01 8.866400e+02 ▁▁▇▁▁
umap_haversine1 0 1.00 1.17 79.73 -568.70 -37.15 1.20 3.954000e+01 5.575500e+02 ▁▁▇▁▁
latScale 0 1.00 0.04 0.86 -2.00 -0.78 0.49 8.600000e-01 3.430000e+00 ▁▅▇▁▁
lonScale 0 1.00 0.02 0.94 -2.00 0.03 0.13 2.800000e-01 1.072000e+01 ▇▁▁▁▁
SpeedScale 0 1.00 0.31 0.88 0.00 0.00 0.00 0.000000e+00 6.140000e+00 ▇▁▁▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
Time 0 1.00 2018-05-06 16:24:10 2024-01-02 14:31:47 2021-05-27 17:59:52 4853226
CurrentTripStart 4808480 0.01 2023-12-15 18:59:33 2023-12-31 18:01:01 2023-12-27 18:38:42 11

Cat umaps