Skip to content

Commit

Permalink
joss minor corrections
Browse files Browse the repository at this point in the history
  • Loading branch information
praneethd7 committed Sep 28, 2023
1 parent b53e904 commit c14af96
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 60 deletions.
116 changes: 58 additions & 58 deletions paper/paper.bib
Original file line number Diff line number Diff line change
@@ -1,83 +1,83 @@
@misc{devunuri2023bus,
title={Bus Stop Spacings Statistics: Theory and Evidence},
author={Saipraneeth Devunuri and Shirin Qiam and Lewis Lehe and Ayush Pandey and Dana Monzer},
year={2023},
eprint={2208.04394},
archivePrefix={arXiv},
primaryClass={stat.ME}
title = {Bus Stop Spacings Statistics: Theory and Evidence},
author = {Saipraneeth Devunuri and Shirin Qiam and Lewis Lehe and Ayush Pandey and Dana Monzer},
year = {2023},
eprint = {2208.04394},
archiveprefix = {arXiv},
primaryclass = {stat.ME}
}

@misc{devunuri2023chatgpt,
title={ChatGPT for GTFS: From Words to Information},
author={Saipraneeth Devunuri and Shirin Qiam and Lewis Lehe},
year={2023},
eprint={2308.02618},
archivePrefix={arXiv},
primaryClass={cs.IR}
title = {ChatGPT for GTFS: From Words to Information},
author = {Saipraneeth Devunuri and Shirin Qiam and Lewis Lehe},
year = {2023},
eprint = {2308.02618},
archiveprefix = {arXiv},
primaryclass = {cs.IR}
}

@data{DVN/SFBIVU_2022,
author = {Devunuri, Saipraneeth and Shirin Qiam and Lewis Lehe},
publisher = {Harvard Dataverse},
title = {{Bus Stop Spacings for Transit Providers in the US}},
UNF = {UNF:6:43Qgi8ldmVm8XIxuC0EoKA==},
year = {2022},
version = {V3},
doi = {10.7910/DVN/SFBIVU},
url = {https://doi.org/10.7910/DVN/SFBIVU}
author = {Devunuri, Saipraneeth and Shirin Qiam and Lewis Lehe},
publisher = {Harvard Dataverse},
title = {{Bus Stop Spacings for Transit Providers in the US}},
unf = {UNF:6:43Qgi8ldmVm8XIxuC0EoKA==},
year = {2022},
version = {V3},
doi = {10.7910/DVN/SFBIVU},
url = {https://doi.org/10.7910/DVN/SFBIVU}
}

@data{DVN/QFTAPM_2023,
author = {Devunuri, Saipraneeth},
publisher = {Harvard Dataverse},
title = {{Bus Stop Spacings for Transit Providers in Canada}},
UNF = {UNF:6:N8LM6i5IQwbp7gxQ4vZhIA==},
year = {2023},
version = {V2},
doi = {10.7910/DVN/QFTAPM},
url = {https://doi.org/10.7910/DVN/QFTAPM}
author = {Devunuri, Saipraneeth},
publisher = {Harvard Dataverse},
title = {{Bus Stop Spacings for Transit Providers in Canada}},
unf = {UNF:6:N8LM6i5IQwbp7gxQ4vZhIA==},
year = {2023},
version = {V2},
doi = {10.7910/DVN/QFTAPM},
url = {https://doi.org/10.7910/DVN/QFTAPM}
}

@article{pereira2023exploring,
title={Exploring the time geography of public transport networks with the gtfs2gps package},
author={Pereira, Rafael HM and Andrade, Pedro R and Vieira, Jo{\~a}o Pedro Bazzo},
journal={Journal of Geographical Systems},
volume={25},
number={3},
pages={453--466},
year={2023},
publisher={Springer}
title = {Exploring the time geography of public transport networks with the gtfs2gps package},
author = {Pereira, Rafael HM and Andrade, Pedro R and Vieira, Jo{\~a}o Pedro Bazzo},
journal = {Journal of Geographical Systems},
volume = {25},
number = {3},
pages = {453--466},
year = {2023},
publisher = {Springer}
}

@article{lehe4135394bus,
title={Bus stop spacing with heterogeneous trip lengths and elastic demand},
author={Lehe, Lewis and Pandey, Ayush},
journal={Available at SSRN 4135394}
title = {Bus stop spacing with heterogeneous trip lengths and elastic demand},
author = {Lehe, Lewis and Pandey, Ayush},
journal = {Available at SSRN 4135394}
}

@SOFTWARE{Toso2023,
title = {gtfs_functions: Package with useful functions to create geo-spatial visualizations from a GTFS.},
author = {Santiago Toso},
month = september,
year = 2023,
publisher = {GitHub},
url = {https://github.com/Bondify/gtfs_functions}
@software{Toso2023,
title = {gtfs_functions: Package with useful functions to create geo-spatial visualizations from a GTFS.},
author = {Santiago Toso},
month = september,
year = 2023,
publisher = {GitHub},
url = {https://github.com/Bondify/gtfs_functions}
}

@SOFTWARE{Whalen2023,
title = {partridge: A fast, forgiving GTFS reader built on pandas DataFrames},
author = {Danny Whalen},
month = september,
year = 2023,
publisher = {GitHub},
url = {https://github.com/remix/partridge}
@software{Whalen2023,
title = {partridge: A fast, forgiving GTFS reader built on pandas DataFrames},
author = {Danny Whalen},
month = september,
year = 2023,
publisher = {GitHub},
url = {https://github.com/remix/partridge}
}

@misc{MobData2023,
author = {{MobilityData}},
title = {{Mobility Database}},
year = {2023},
note = {\url{https://database.mobilitydata.org/}}
url = {https://database.mobilitydata.org/}
}

@article{Voulgaris2023Predictors,
Expand Down Expand Up @@ -106,8 +106,8 @@ @article{Wu2022
}

@article{maneewongvatana1999analysis,
title={Analysis of approximate nearest neighbor searching with clustered point sets},
author={Maneewongvatana, Songrit and Mount, David M},
journal={arXiv preprint cs/9901013},
year={1999}
title = {Analysis of approximate nearest neighbor searching with clustered point sets},
author = {Maneewongvatana, Songrit and Mount, David M},
journal = {arXiv preprint cs/9901013},
year = {1999}
}
4 changes: 2 additions & 2 deletions paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ The choice of bus stop spacing involves a tradeoff between accessibility and spe

# Computing Segments

A `segment` is defined by three elements: (i) a start stop, (ii) end stop and (iii) the path that the bus travels along the route in between the two consecutive stops. The segments are computed using route shape geometries and stop locations included in GTFS data. Packages such as `gtfs2gps`[@pereira2023exploring] and `gtfs_functions`[@Toso2023] can compute segments, but this package does so in computationally-efficient way and includes a new way to account for cases when the reported stop locations are misaligned with the route shapes. Figure \autoref{fig:example} provides some example cases where a stop is equidistant from multiple points on a route. Thus, projecting the stop onto the route or snapping to nearest geo-coordinate (lat,lon) may produce errors, such as stops that are out-of-order or stop being snapped far from their locations. Also, the time complexity of projection or snapping is $O(mn)$ using brute force for `m` geo-coordinates that represent the route shape and `n` stops that have to projected.
A `segment` is defined by three elements: (i) a start stop, (ii) end stop and (iii) the path that the bus travels along the route in between the two consecutive stops. The segments are computed using route shape geometries and stop locations included in GTFS data. Packages such as `gtfs2gps`[@pereira2023exploring] and `gtfs_functions`[@Toso2023] can compute segments, but this package does so in computationally-efficient way and includes a new way to account for cases when the reported stop locations are misaligned with the route shapes. \autoref{fig:example} provides some example cases where a stop is equidistant from multiple points on a route. Thus, projecting the stop onto the route or snapping to nearest geo-coordinate (lat,lon) may produce errors, such as stops that are out-of-order or stop being snapped far from their locations. Also, the time complexity of projection or snapping is $O(mn)$ using brute force for `m` geo-coordinates that represent the route shape and `n` stops that have to projected.

![Example route shapes with stop locations that are equidistant from multiple points along the route.\label{fig:example}](snapping_difficulty.jpg)

`gtfs-segments` overcomes these challenges by increasing the route resolution (i.e., adding points in-between geo-coordinates), using spatial k-d trees, and using more than one nearest neighbor. The increase in resolution allows stops to be snapped to nearby points. k-d trees[@maneewongvatana1999analysis] reduce the time complexity to $O(nlog(m))$ and make it possible to compare among several snapping points without added computation. Figure \autoref{figinterpolate} shows a difficult example route. In the first panel, snapping to the nearest point produces out-of-order stops (3/4/2) and stop 5 is snapped far away from its location. In the second panel, increased resolution fixes 5's location problem but the ordering problem persists. In the third panel, we use the `k=3` nearest neighbors and thus find a proper ordering. Once every stop has been snapped to a geo-coordinate on the route shape, the shape is segmented between stops and each segment represented by a `LINESTRING` for entry in the GeoDataFrame. In fact, `gtfs_segments` starts with `k=3` for the neighbors and doubles `k` until we find the correct sequence of stops or remove the corresponding trip. On average, fewer than 1% of trips fail, which can be manually corrected and validated.
`gtfs-segments` overcomes these challenges by increasing the route resolution (i.e., adding points in-between geo-coordinates), using spatial k-d trees, and using more than one nearest neighbor. The increase in resolution allows stops to be snapped to nearby points. k-d trees[@maneewongvatana1999analysis] reduce the time complexity to $O(nlog(m))$ and make it possible to compare among several snapping points without added computation. \autoref{fig:interpolate} shows a difficult example route. In the first panel, snapping to the nearest point produces out-of-order stops (3/4/2) and stop 5 is snapped far away from its location. In the second panel, increased resolution fixes 5's location problem but the ordering problem persists. In the third panel, we use the `k=3` nearest neighbors and thus find a proper ordering. Once every stop has been snapped to a geo-coordinate on the route shape, the shape is segmented between stops and each segment represented by a `LINESTRING` for entry in the GeoDataFrame. In fact, `gtfs_segments` starts with `k=3` for the neighbors and doubles `k` until we find the correct sequence of stops or remove the corresponding trip. On average, fewer than 1% of trips fail, which can be manually corrected and validated.

![Improvement in snapping due to an increase in resolution and suing k-nearest neighbors.\label{fig:interpolate}. Adapted from "Bus Stop Spacings Statistics: Theory and Evidence" [@devunuri2023bus]](interpolation.jpg)

Expand Down

0 comments on commit c14af96

Please sign in to comment.