Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reducing attribute tagging within tippecanoe-overzoom #296

Merged
merged 5 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,16 @@ overzoom-test: tippecanoe-overzoom
cmp tests/pbf/bin-11-327-791.pbf.out.json.check tests/pbf/bin-11-327-791.pbf.out.json
rm tests/pbf/bin-11-327-791.pbf.out.json.check tests/pbf/bin-11-327-791.pbf.out
# Binning by id
./tippecanoe-overzoom -o tests/pbf/bin-11-327-791-ids.pbf.out --assign-to-bins tests/pbf/sf-zips.json --bin-by-id bin-ids tests/pbf/yearbuilt.pbf 11/327/791 11/327/791
./tippecanoe-overzoom -o tests/pbf/bin-11-327-791-ids.pbf.out --assign-to-bins tests/pbf/sf-zips.json --bin-by-id-list bin-ids tests/pbf/yearbuilt.pbf 11/327/791 11/327/791
./tippecanoe-decode tests/pbf/bin-11-327-791-ids.pbf.out 11 327 791 > tests/pbf/bin-11-327-791-ids.pbf.out.json.check
cmp tests/pbf/bin-11-327-791-ids.pbf.out.json.check tests/pbf/bin-11-327-791-ids.pbf.out.json
rm tests/pbf/bin-11-327-791-ids.pbf.out.json.check tests/pbf/bin-11-327-791-ids.pbf.out
# Binning by id, attribute stripping
# Note that it still works even if we exclude the ID that we are binning by
./tippecanoe-overzoom -yZCTA5CE10 -ytippecanoe:count -o tests/pbf/bin-11-327-791-ids-zip.pbf.out --assign-to-bins tests/pbf/sf-zips.json --bin-by-id-list bin-ids tests/pbf/yearbuilt.pbf 11/327/791 11/327/791
./tippecanoe-decode tests/pbf/bin-11-327-791-ids-zip.pbf.out 11 327 791 > tests/pbf/bin-11-327-791-ids-zip.pbf.out.json.check
cmp tests/pbf/bin-11-327-791-ids-zip.pbf.out.json.check tests/pbf/bin-11-327-791-ids-zip.pbf.out.json
rm tests/pbf/bin-11-327-791-ids-zip.pbf.out.json.check tests/pbf/bin-11-327-791-ids-zip.pbf.out
# Binning with longitude wraparound problems
./tippecanoe-overzoom -o tests/pbf/0-0-0-pop-2-0-1.pbf.out --accumulate-numeric-attributes=tippecanoe --assign-to-bins tests/pbf/h3-2-0-1.geojson tests/pbf/0-0-0.pbf 2/0/1 2/0/1
./tippecanoe-decode tests/pbf/0-0-0-pop-2-0-1.pbf.out 2 0 1 > tests/pbf/0-0-0-pop-2-0-1.pbf.out.json.check
Expand Down Expand Up @@ -618,11 +624,36 @@ accumulate-test:
# the cluster sizes still add up to the 243 original features
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | sed 's/.*clustered:cluster_size": //' | awk '{sum += $$1} END {print sum}'` == 243
#
# Binning with attribute stripping
./tippecanoe-overzoom -y clustered:count:POP1950 -y clustered:sum:POP1950 -y POP1950 -y clustered:cluster_size --assign-to-bins tests/pbf/h3-0-0-0.geojson --accumulate-numeric-attributes=clustered --accumulate-attribute '{"clustered:cluster_size":"sum"}' -o tests/pbf/bins-0-0-0.pbf tests/pbf/accum.dir/0/0/0.pbf 0/0/0 0/0/0
# Now there are 30 bins with POP1950 clusters
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep 'clustered:count:POP1950' | wc -l` == 41
# There are none with bare POP1950 (which is expected; we should only have summary statistics)
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep -v 'clustered:count:POP1950' | grep 'POP1950' | wc -l` == 0
# And 4 with no POP1950 at all
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep -v 'POP1950' | wc -l` == 3
#
# the clustered and megatile-filtered and binned POP1950s add up to 161590
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep 'clustered:sum:POP1950' | sed 's/.*"clustered:sum:POP1950": //' | awk '{sum += $$1} END {print sum}'` == 161590
# which is the right global total
# Make sure we do *not* accumulate a numeric attribute that already has the magic prefix:
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep sum:clustered:unrelated | wc -l` == 0
# And those attributes do *not* make it onto the bins
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep clustered:unrelated | wc -l` == 0
# the cluster sizes still add up to the 243 original features
test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | sed 's/.*clustered:cluster_size": //' | awk '{sum += $$1} END {print sum}'` == 243
#
#
# A tile where the counts and means were previously wrong:
./tippecanoe-overzoom --accumulate-numeric-attributes=felt -m -o tests/pbf/yearbuilt-accum.pbf tests/pbf/yearbuilt.pbf 0/0/0 0/0/0
./tippecanoe-decode tests/pbf/yearbuilt-accum.pbf 0 0 0 > tests/pbf/yearbuilt-accum.pbf.json.check
cmp tests/pbf/yearbuilt-accum.pbf.json.check tests/pbf/yearbuilt-accum.pbf.json
rm tests/pbf/yearbuilt-accum.pbf tests/pbf/yearbuilt-accum.pbf.json.check
# Same tile, with attribute stripping
./tippecanoe-overzoom --accumulate-numeric-attributes=felt -y bldgsqft -y felt:sum:bldgsqft -m -o tests/pbf/yearbuilt-accum-bldgsqft.pbf tests/pbf/yearbuilt.pbf 0/0/0 0/0/0
./tippecanoe-decode tests/pbf/yearbuilt-accum-bldgsqft.pbf 0 0 0 > tests/pbf/yearbuilt-accum-bldgsqft.pbf.json.check
cmp tests/pbf/yearbuilt-accum-bldgsqft.pbf.json.check tests/pbf/yearbuilt-accum-bldgsqft.pbf.json
rm tests/pbf/yearbuilt-accum-bldgsqft.pbf tests/pbf/yearbuilt-accum-bldgsqft.pbf.json.check

join-filter-test: tippecanoe tippecanoe-decode tile-join
# Comes out different from the direct tippecanoe run because null attributes are lost
Expand Down
135 changes: 76 additions & 59 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,30 @@ struct tile_feature {
size_t seq = 0;
};

static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &accumulate_numeric) {
static bool should_keep(std::string const &key,
std::set<std::string> const &keep,
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
if (keep.size() == 0 || keep.find(key) != keep.end()) {
if (exclude.find(key) != exclude.end()) {
return false;
}

for (auto const &prefix : exclude_prefix) {
if (starts_with(key, prefix)) {
return false;
}
}

return true;
}

return false;
}

static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &accumulate_numeric,
std::set<std::string> const &keep, std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
std::string accumulate_numeric_colon = accumulate_numeric + ":";

std::unordered_map<std::string, size_t> attributes;
Expand Down Expand Up @@ -1156,7 +1179,10 @@ static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &
mvt_value mean;
mean.type = mvt_double;
mean.numeric_value.double_value = mvt_value_to_double(sum) / count_val;
layer.tag(feature, accumulate_numeric + ":mean:" + trunc, mean);

if (should_keep(key, keep, exclude, exclude_prefix)) {
layer.tag(feature, accumulate_numeric + ":mean:" + trunc, mean);
}
}
}
}
Expand All @@ -1170,7 +1196,9 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state, // accumulation state for preserve_attribute()
key_pool &key_pool) {
key_pool &key_pool,
std::set<std::string> const &keep, std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
// If this is a numeric attribute, but there is also a prefix:sum (etc.) for the
// same attribute, we want to use that one instead of this one.

Expand Down Expand Up @@ -1203,6 +1231,10 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
// and then put it back on for the output field
std::string prefixed = accumulate_numeric + ":" + op.first + ":" + outkey;

if (!should_keep(prefixed, keep, exclude, exclude_prefix)) {
continue;
}

// Does it exist in the output feature already?

auto prefixed_attr = numeric_out_field.find(prefixed);
Expand Down Expand Up @@ -1263,27 +1295,6 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
}
}

static bool should_keep(std::string const &key,
std::set<std::string> const &keep,
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
if (keep.size() == 0 || keep.find(key) != keep.end()) {
if (exclude.find(key) != exclude.end()) {
return false;
}

for (auto const &prefix : exclude_prefix) {
if (starts_with(key, prefix)) {
return false;
}
}

return true;
}

return false;
}

static void handle_closepath_from_mvt(drawvec &geom) {
// mvt geometries close polygons with a mvt_closepath operation
// tippecanoe-internal geometries close polygons with a lineto to the initial point
Expand All @@ -1306,7 +1317,7 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::vector<std::string> const &exclude_prefix,
std::unordered_map<std::string, attribute_op> const &attribute_accum,
std::string const &accumulate_numeric,
key_pool &key_pool, int buffer) {
key_pool &key_pool, int buffer, bool include_nonaggregate) {
// Add geometry to output feature

drawvec geom = features[0].geom;
Expand Down Expand Up @@ -1357,22 +1368,22 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::vector<mvt_value> full_values;
std::map<std::string, size_t> numeric_out_field;

for (size_t i = 0; i + 1 < features[0].tags.size(); i += 2) {
const std::string &key = features[0].layer->keys[features[0].tags[i]];
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(key_pool.pool(features[0].layer->keys[features[0].tags[i]]));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && features[0].layer->values[features[0].tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key_pool.pool(key));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else {
// otherwise just tag it directly onto the output feature
if (should_keep(features[0].layer->keys[features[0].tags[i]], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, features[0].layer->keys[features[0].tags[i]], features[0].layer->values[features[0].tags[i + 1]]);
auto const &f = features[0];
for (size_t i = 0; i + 1 < f.tags.size(); i += 2) {
const std::string &key = f.layer->keys[f.tags[i]];
if (should_keep(key, keep, exclude, exclude_prefix)) {
if (attribute_accum.find(key) != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(key_pool.pool(f.layer->keys[f.tags[i]]));
full_values.push_back(f.layer->values[f.tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && f.layer->values[f.tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key_pool.pool(key));
full_values.push_back(f.layer->values[f.tags[i + 1]]);
} else if (include_nonaggregate) {
// otherwise just tag it directly onto the output feature
outlayer.tag(outfeature, f.layer->keys[f.tags[i]], f.layer->values[f.tags[i + 1]]);
}
}
}
Expand All @@ -1386,22 +1397,26 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou

for (size_t j = 0; j + 1 < features[i].tags.size(); j += 2) {
const std::string &key = features[i].layer->keys[features[i].tags[j]];
keys.insert(key);
if (should_keep(key, keep, exclude, exclude_prefix)) {
keys.insert(key);
}
}

for (size_t j = 0; j + 1 < features[i].tags.size(); j += 2) {
const std::string &key = features[i].layer->keys[features[i].tags[j]];

auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state, key_pool);
if (should_keep(key, keep, exclude, exclude_prefix)) {
auto found = attribute_accum.find(key);
if (found != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(found->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state, key_pool,
keep, exclude, exclude_prefix);
}
}
}
}
Expand All @@ -1417,9 +1432,9 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
}

if (accumulate_numeric.size() > 0) {
add_mean(outfeature, outlayer, accumulate_numeric);
add_mean(outfeature, outlayer, accumulate_numeric, keep, exclude, exclude_prefix);
}
} else {
} else if (include_nonaggregate) {
for (size_t i = 0; i + 1 < features[0].tags.size(); i += 2) {
if (should_keep(features[0].layer->keys[features[0].tags[i]], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, features[0].layer->keys[features[0].tags[i]], features[0].layer->values[features[0].tags[i + 1]]);
Expand Down Expand Up @@ -1725,7 +1740,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
if (outfeatures[i].size() > 1) {
if (feature_out(outfeatures[i], outlayer,
keep, exclude, exclude_prefix, attribute_accum,
accumulate_numeric, key_pool, buffer)) {
accumulate_numeric, key_pool, buffer, true)) {
mvt_feature &nfeature = outlayer.features.back();
mvt_value val;
val.type = mvt_uint;
Expand All @@ -1737,7 +1752,9 @@ mvt_tile assign_to_bins(mvt_tile &features,
} else {
attrname = accumulate_numeric + ":count";
}
outlayer.tag(nfeature, attrname, val);
if (should_keep(attrname, keep, exclude, exclude_prefix)) {
outlayer.tag(nfeature, attrname, val);
}
}
}
}
Expand Down Expand Up @@ -1886,7 +1903,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int

if (flush_multiplier_cluster) {
if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1, bins.size() == 0);
if (outlayer->features.size() >= feature_limit) {
break;
}
Expand Down Expand Up @@ -1946,7 +1963,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
}

if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool, -1, bins.size() == 0);
pending_tile_features.clear();
if (outlayer->features.size() >= feature_limit) {
break;
Expand Down
Loading