From 5b8f009a1ee90bdcfcbf099d2fbe9b7ab3bee8be Mon Sep 17 00:00:00 2001
From: Colin Dellow <cldellow@gmail.com>
Date: Sat, 20 Jan 2024 11:05:00 -0500
Subject: support GeoJSON lines format

Described at https://stevage.github.io/ndgeojson/; each feature is given
its own line, rather than being wrapped in a FeatureCollection.
---
 include/geojson_processor.h |  3 +++
 src/geojson_processor.cpp   | 51 ++++++++++++++++++++++++++++++++++++-
 src/tilemaker.cpp           |  2 +-
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/include/geojson_processor.h b/include/geojson_processor.h
index ffef3db..51d45d0 100644
--- a/include/geojson_processor.h
+++ b/include/geojson_processor.h
@@ -31,6 +31,9 @@ private:
 	OsmLuaProcessing &osmLuaProcessing;
 	std::mutex attributeMutex;
 
+	void readFeatureCollection(class LayerDef &layer, uint layerNum);
+	void readFeatureLines(class LayerDef &layer, uint layerNum);
+
 	template <bool Flag, typename T>
 	void processFeature(rapidjson::GenericObject<Flag, T> feature, class LayerDef &layer, uint layerNum);
 
diff --git a/src/geojson_processor.cpp b/src/geojson_processor.cpp
index 3896571..ae5ba85 100644
--- a/src/geojson_processor.cpp
+++ b/src/geojson_processor.cpp
@@ -1,5 +1,6 @@
 #include "geojson_processor.h"
 
+#include "helpers.h"
 #include <boost/asio/thread_pool.hpp>
 #include <boost/asio/post.hpp>
 
@@ -8,14 +9,29 @@
 #include "rapidjson/stringbuffer.h"
 #include "rapidjson/filereadstream.h"
 
+#include <sys/stat.h>
+#include <deque>
+
 extern bool verbose;
 
 namespace geom = boost::geometry;
 
+long getFileSize(std::string filename) {
+	struct stat64 statBuf;
+	int rc = stat64(filename.c_str(), &statBuf);
+	return rc == 0 ? statBuf.st_size : -1;
+}
+
 // Read GeoJSON, and create OutputObjects for all objects within the specified bounding box
 void GeoJSONProcessor::read(class LayerDef &layer, uint layerNum) {
+	if (ends_with(layer.source, "JSONL") || ends_with(layer.source, "jsonl"))
+		return readFeatureLines(layer, layerNum);
 
-	// Parse the JSON file into a RapidJSON document
+	readFeatureCollection(layer, layerNum);
+}
+
+void GeoJSONProcessor::readFeatureCollection(class LayerDef &layer, uint layerNum) {
+	// Read a JSON file containing a single GeoJSON FeatureCollection object.
 	rapidjson::Document doc;
 	FILE* fp = fopen(layer.source.c_str(), "r");
 	char readBuffer[65536];
@@ -38,6 +54,39 @@ void GeoJSONProcessor::read(class LayerDef &layer, uint layerNum) {
 	pool.join();
 }
 
+void GeoJSONProcessor::readFeatureLines(class LayerDef &layer, uint layerNum) {
+	// Read a JSON file containing multiple GeoJSON items, newline-delimited.
+	std::deque<rapidjson::Document> docs;
+	FILE* fp = fopen(layer.source.c_str(), "r");
+	char readBuffer[65536];
+	rapidjson::FileReadStream is(fp, readBuffer, sizeof(readBuffer));
+
+	long fileSize = getFileSize(layer.source.c_str());
+
+	if (fileSize == -1)
+		throw std::runtime_error("unable to get filesize of " + layer.source);
+
+	while (is.Tell() < fileSize) {
+		docs.push_back(rapidjson::Document());
+		rapidjson::Document& doc = docs.back();
+		doc.ParseStream<rapidjson::kParseStopWhenDoneFlag>(is);
+		if (doc.HasParseError()) { throw std::runtime_error("Invalid JSON file."); }
+
+		// Skip whitespace.
+		while(is.Tell() < fileSize && isspace(is.Peek())) is.Take();
+	}
+	fclose(fp);
+
+	// Process each feature
+	boost::asio::thread_pool pool(threadNum);
+	for (auto &doc : docs) { 
+		boost::asio::post(pool, [&]() {
+			processFeature(std::move(doc.GetObject()), layer, layerNum);
+		});
+	}
+	pool.join();
+}
+
 template <bool Flag, typename T>
 void GeoJSONProcessor::processFeature(rapidjson::GenericObject<Flag, T> feature, class LayerDef &layer, uint layerNum) {
 
diff --git a/src/tilemaker.cpp b/src/tilemaker.cpp
index b18b726..39ac2f3 100644
--- a/src/tilemaker.cpp
+++ b/src/tilemaker.cpp
@@ -244,7 +244,7 @@ int main(const int argc, const char* argv[]) {
 				if (!hasClippingBox) {
 					cerr << "Can't read shapefiles unless a bounding box is provided." << endl;
 					exit(EXIT_FAILURE);
-				} else if (ends_with(layer.source, "json") || ends_with(layer.source, "JSON")) {
+				} else if (ends_with(layer.source, "json") || ends_with(layer.source, "jsonl") || ends_with(layer.source, "JSON") || ends_with(layer.source, "JSONL")) {
 					cout << "Reading GeoJSON " << layer.name << endl;
 					geoJSONProcessor.read(layers.layers[layerNum], layerNum);
 				} else {
-- 
2.47.3

