From e1961830b151d8113e656254e1837a7155ab0c30 Mon Sep 17 00:00:00 2001 From: "Robert J. Macomber" Date: Tue, 5 Jan 2021 12:45:50 -0800 Subject: [PATCH 1/7] Speed up hole nesting With large polygons and lots of holes, this can be _extremely_ slow, so I've converted it to Rust and done some optimization on the contains raycast. For the problematic shapefile that exposed this, processing was a bit under 50x slower than with the Rust code. --- .gitignore | 1 + lib/exshape.ex | 16 +- lib/exshape/shp.ex | 43 +++-- mix.exs | 22 ++- mix.lock | 16 +- native/exshape_shape/.gitignore | 1 + native/exshape_shape/Cargo.lock | 280 ++++++++++++++++++++++++++++ native/exshape_shape/Cargo.toml | 17 ++ native/exshape_shape/src/lib.rs | 71 +++++++ native/exshape_shape/src/lineseg.rs | 7 + native/exshape_shape/src/point.rs | 5 + native/exshape_shape/src/poly.rs | 37 ++++ native/exshape_shape/src/ring.rs | 223 ++++++++++++++++++++++ test/exshape_test.exs | 24 +++ test/test_helper.exs | 1 + 15 files changed, 735 insertions(+), 29 deletions(-) create mode 100644 native/exshape_shape/.gitignore create mode 100644 native/exshape_shape/Cargo.lock create mode 100644 native/exshape_shape/Cargo.toml create mode 100644 native/exshape_shape/src/lib.rs create mode 100644 native/exshape_shape/src/lineseg.rs create mode 100644 native/exshape_shape/src/point.rs create mode 100644 native/exshape_shape/src/poly.rs create mode 100644 native/exshape_shape/src/ring.rs diff --git a/.gitignore b/.gitignore index 008b35d..e11e9ad 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,6 @@ /cover /deps /doc +/priv erl_crash.dump *.ez diff --git a/lib/exshape.ex b/lib/exshape.ex index 6423a3f..aef4776 100644 --- a/lib/exshape.ex +++ b/lib/exshape.ex @@ -8,10 +8,10 @@ defmodule Exshape do defp open_file(c, size), do: File.stream!(c, [], size) - defp zip(nil, nil), do: [] - defp zip(nil, d), do: Dbf.read(d) - defp zip(s, nil), do: Shp.read(s) - defp zip(s, d), do: Stream.zip(Shp.read(s), Dbf.read(d)) + defp zip(nil, nil, _), do: [] + defp zip(nil, d, _), do: Dbf.read(d) + defp zip(s, nil, opts), do: Shp.read(s, opts) + defp zip(s, d, opts), do: Stream.zip(Shp.read(s, opts), Dbf.read(d)) defp unzip!(path, cwd, false), do: :zip.extract(to_charlist(path), cwd: cwd) defp unzip!(path, cwd, true) do @@ -78,12 +78,13 @@ defmodule Exshape do end open_file(Path.join(cwd, file), size) end - }) + }, + opts) end end @spec from_filesystem(Filesystem.t) :: [layer] - def from_filesystem(fs) do + def from_filesystem(fs, opts \\ []) do fs.list.() |> Enum.filter(&keep_file?/1) |> Enum.map(fn {:zip_file, filename, _, _, _, _} -> filename end) @@ -110,7 +111,8 @@ defmodule Exshape do # zip up the unzipped shp and dbf components stream = zip( shp && fs.stream.(shp), - dbf && fs.stream.(dbf) + dbf && fs.stream.(dbf), + opts ) {Path.basename(root), prj_contents, stream} diff --git a/lib/exshape/shp.ex b/lib/exshape/shp.ex index 329c636..65b5188 100644 --- a/lib/exshape/shp.ex +++ b/lib/exshape/shp.ex @@ -1,5 +1,9 @@ defmodule Exshape.Shp do + require Rustler + use Rustler, otp_app: :exshape, crate: :exshape_shape + defmodule State do + @enforce_keys [:nest_holes] defstruct mode: :header, shape_type: nil, emit: [], @@ -7,7 +11,8 @@ defmodule Exshape.Shp do item: nil, part_index: 0, measures: [], - z_values: [] + z_values: [], + nest_holes: nil end @magic_nodata_num :math.pow(10, 38) * -1 @@ -111,8 +116,8 @@ defmodule Exshape.Shp do %{s | measures: [], z_values: []} end - defp emit(s, %Polygon{} = p) do - %{s | mode: :record_header, emit: [%{p | points: nest_polygon(p)} | s.emit]} + defp emit(%State{nest_holes: nest_holes} = s, %Polygon{} = p) do + %{s | mode: :record_header, emit: [%{p | points: nest_polygon(p, nest_holes)} | s.emit]} end defp emit(s, %Polyline{} = p) do @@ -134,9 +139,9 @@ defmodule Exshape.Shp do %{s | mode: :record_header, emit: [polylinem | s.emit]} |> reset_unzipped end - defp emit(s, %PolygonM{} = pm) do + defp emit(%State{nest_holes: nest_holes} = s, %PolygonM{} = pm) do p = zip_measures(pm, s) - polylinem = %{p | points: nest_polygon(p)} + polylinem = %{p | points: nest_polygon(p, nest_holes)} %{s | mode: :record_header, emit: [polylinem | s.emit]} |> reset_unzipped end @@ -158,12 +163,12 @@ defmodule Exshape.Shp do %{s | mode: :record_header, emit: [polylinez | s.emit]} |> reset_unzipped end - defp emit(s, %PolygonZ{} = pz) do + defp emit(%State{nest_holes: nest_holes} = s, %PolygonZ{} = pz) do p = pz |> zip_measures(s) |> zip_zvals(s) - polygonz = %{p | points: nest_polygon(p)} + polygonz = %{p | points: nest_polygon(p, nest_holes)} %{s | mode: :record_header, emit: [polygonz | s.emit]} |> reset_unzipped end @@ -218,11 +223,20 @@ defmodule Exshape.Shp do parts end - - def nest_polygon(p) do + def nest_polygon(p, nest_holes \\ &beam_nest_holes/2) do {polys, holes} = unflatten_parts(p) |> Enum.split_with(&is_clockwise?/1) - Enum.reduce(holes, Enum.map(polys, fn p -> [p] end), fn hole, polys -> + nest_holes.(Enum.map(polys, fn p -> [p] end), holes) + end + + defp native_nest_holes(polys, holes) do + {:ok, r} = native_nest_holes_impl(polys, holes) + r + end + defp native_nest_holes_impl(_polys, _holes), do: throw :nif_not_loaded + + defp beam_nest_holes(polys, holes) do + Enum.reduce(holes, polys, fn hole, polys -> nest_hole(hole, polys) end) end @@ -754,8 +768,13 @@ defmodule Exshape.Shp do |> Stream.run ``` """ - def read(byte_stream) do - Stream.transform(byte_stream, {<<>>, %State{}}, fn bin, {buf, state} -> + def read(byte_stream, opts \\ []) do + native = Keyword.get(opts, :native, false) + + state = %State{ + nest_holes: if(native, do: &native_nest_holes/2, else: &beam_nest_holes/2) + } + Stream.transform(byte_stream, {<<>>, state}, fn bin, {buf, state} -> case do_read(state, buf <> bin) do {_, %State{mode: :done}} = s -> {:halt, s} {buf, %State{emit: emit} = s} -> {Enum.reverse(emit), {buf, %{s | emit: []}}} diff --git a/mix.exs b/mix.exs index ab5df76..917a2c5 100644 --- a/mix.exs +++ b/mix.exs @@ -10,10 +10,25 @@ defmodule Exshape.Mixfile do package: package(), build_embedded: Mix.env() == :prod, start_permanent: Mix.env() == :prod, - deps: deps() + deps: deps(), + compilers: [:rustler] ++ Mix.compilers, + rustler_crates: rustler_crates() ] end + defp rustler_crates do + if System.get_env("ALCHEMIST_MODE") do + [] + else + [ + exsoda_shape: [ + path: "native/exshape_shape", + mode: :release + ] + ] + end + end + defp description do """ Read ESRI Shapefiles as a stream of features and their attributes @@ -33,7 +48,7 @@ defmodule Exshape.Mixfile do # Type "mix help compile.app" for more information def application do # Specify extra applications you'll use from Erlang/Elixir - [extra_applications: [:logger]] + [extra_applications: [:logger, :rustler]] end # Dependencies can be Hex packages: @@ -49,7 +64,8 @@ defmodule Exshape.Mixfile do [ {:elixir_uuid, "~> 1.2"}, {:ex_doc, ">= 0.0.0", only: :dev}, - {:poison, "~> 3.1", only: :test} + {:poison, "~> 3.1", only: :test}, + {:rustler, "~> 0.21.1", sparse: "rustler_mix"}, ] end end diff --git a/mix.lock b/mix.lock index 646fa60..dcbafce 100644 --- a/mix.lock +++ b/mix.lock @@ -1,10 +1,12 @@ %{ - "earmark": {:hex, :earmark, "1.2.5", "4d21980d5d2862a2e13ec3c49ad9ad783ffc7ca5769cf6ff891a4553fbaae761", [:mix], [], "hexpm"}, - "elixir_uuid": {:hex, :elixir_uuid, "1.2.0", "ff26e938f95830b1db152cb6e594d711c10c02c6391236900ddd070a6b01271d", [:mix], [], "hexpm"}, - "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"}, - "makeup": {:hex, :makeup, "0.5.1", "966c5c2296da272d42f1de178c1d135e432662eca795d6dc12e5e8787514edf7", [:mix], [{:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.8.0", "1204a2f5b4f181775a0e456154830524cf2207cf4f9112215c05e0b76e4eca8b", [:mix], [{:makeup, "~> 0.5.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, - "nimble_parsec": {:hex, :nimble_parsec, "0.2.2", "d526b23bdceb04c7ad15b33c57c4526bf5f50aaa70c7c141b4b4624555c68259", [:mix], [], "hexpm"}, - "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"}, + "earmark": {:hex, :earmark, "1.2.5", "4d21980d5d2862a2e13ec3c49ad9ad783ffc7ca5769cf6ff891a4553fbaae761", [:mix], [], "hexpm", "c57508ddad47dfb8038ca6de1e616e66e9b87313220ac5d9817bc4a4dc2257b9"}, + "elixir_uuid": {:hex, :elixir_uuid, "1.2.0", "ff26e938f95830b1db152cb6e594d711c10c02c6391236900ddd070a6b01271d", [:mix], [], "hexpm", "e4d6e26434471761ed45a3545239da87af7b70904dd4442a55f87d06b137c56b"}, + "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "dc87f778d8260da0189a622f62790f6202af72f2f3dee6e78d91a18dd2fcd137"}, + "makeup": {:hex, :makeup, "0.5.1", "966c5c2296da272d42f1de178c1d135e432662eca795d6dc12e5e8787514edf7", [:mix], [{:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "259748a45dfcf5f49765a7c29c9594791c82de23e22d7a3e6e59533fe8e8935b"}, + "makeup_elixir": {:hex, :makeup_elixir, "0.8.0", "1204a2f5b4f181775a0e456154830524cf2207cf4f9112215c05e0b76e4eca8b", [:mix], [{:makeup, "~> 0.5.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "393d17c5a648e3b30522b2a4743bd1dc3533e1227c8c2823ebe8c3a8e5be5913"}, + "nimble_parsec": {:hex, :nimble_parsec, "0.2.2", "d526b23bdceb04c7ad15b33c57c4526bf5f50aaa70c7c141b4b4624555c68259", [:mix], [], "hexpm", "4ababf5c44164f161872704e1cfbecab3935fdebec66c72905abaad0e6e5cef6"}, + "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm", "fec8660eb7733ee4117b85f55799fd3833eb769a6df71ccf8903e8dc5447cfce"}, + "rustler": {:hex, :rustler, "0.21.1", "5299980be32da997c54382e945bacaa015ed97a60745e1e639beaf6a7b278c65", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6ee1651e10645b2b2f3bb70502bf180341aa058709177e9bc28c105934094bc6"}, + "toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm", "f1e3dabef71fb510d015fad18c0e05e7c57281001141504c6b69d94e99750a07"}, "uuid": {:hex, :uuid, "1.1.8", "e22fc04499de0de3ed1116b770c7737779f226ceefa0badb3592e64d5cfb4eb9", [:mix], [], "hexpm"}, } diff --git a/native/exshape_shape/.gitignore b/native/exshape_shape/.gitignore new file mode 100644 index 0000000..b83d222 --- /dev/null +++ b/native/exshape_shape/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/native/exshape_shape/Cargo.lock b/native/exshape_shape/Cargo.lock new file mode 100644 index 0000000..57180d4 --- /dev/null +++ b/native/exshape_shape/Cargo.lock @@ -0,0 +1,280 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "addr2line" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55f82cfe485775d02112886f4169bde0c5894d75e79ead7eafe7e40a25e45f7" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "backtrace" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef5140344c85b01f9bbb4d4b7288a8aa4b3287ccef913a14bcc78a1063623598" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "derivative" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaed5874effa6cde088c644ddcdcb4ffd1511391c5be4fdd7a5ccd02c7e4a183" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.8", + "syn 1.0.57", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "exshape_shape" +version = "0.1.0" +dependencies = [ + "derivative", + "float_extras", + "itertools", + "rustler", + "rustler_codegen", +] + +[[package]] +name = "failure" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" +dependencies = [ + "backtrace", +] + +[[package]] +name = "float_extras" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b22b70f8649ea2315955f1a36d964b0e4da482dfaa5f0d04df0d1fb7c338ab7a" +dependencies = [ + "libc", +] + +[[package]] +name = "gimli" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" + +[[package]] +name = "heck" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "itertools" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319" +dependencies = [ + "either", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" + +[[package]] +name = "miniz_oxide" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "object" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d3b63360ec3cb337817c2dbd47ab4a0f170d285d8e5a2064600f3def1402397" + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid 0.1.0", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid 0.2.1", +] + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + +[[package]] +name = "quote" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +dependencies = [ + "proc-macro2 1.0.24", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e3bad0ee36814ca07d7968269dd4b7ec89ec2da10c4bb613928d3077083c232" + +[[package]] +name = "rustler" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "533dc3379a0f166749ce262a941e9b52ce19c3208729fc6b6cce76aea76d939b" +dependencies = [ + "lazy_static", + "rustler_codegen", + "rustler_sys", + "which", +] + +[[package]] +name = "rustler_codegen" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21563a1c4b02773f5c6dce723630c9998694258ff4d67bd6025ba057a29b51c" +dependencies = [ + "heck", + "proc-macro2 0.4.30", + "quote 0.6.13", + "syn 0.15.44", +] + +[[package]] +name = "rustler_sys" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb96034ff33723615fd19223d58c987c1f6476342e83557a6e467ef95f83bda" +dependencies = [ + "unreachable", +] + +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid 0.1.0", +] + +[[package]] +name = "syn" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4211ce9909eb971f111059df92c45640aad50a619cf55cd76476be803c4c68e6" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.8", + "unicode-xid 0.2.1", +] + +[[package]] +name = "unicode-segmentation" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +dependencies = [ + "void", +] + +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + +[[package]] +name = "which" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b57acb10231b9493c8472b20cb57317d0679a49e0bdbee44b3b803a6473af164" +dependencies = [ + "failure", + "libc", +] diff --git a/native/exshape_shape/Cargo.toml b/native/exshape_shape/Cargo.toml new file mode 100644 index 0000000..5e33c23 --- /dev/null +++ b/native/exshape_shape/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "exshape_shape" +version = "0.1.0" +authors = [] +edition = "2018" + +[lib] +name = "exshape_shape" +path = "src/lib.rs" +crate-type = ["dylib"] + +[dependencies] +rustler = "0.21.1" +rustler_codegen = "0.21.1" +itertools = "0.10" +derivative = "2.1" +float_extras = "0.1" diff --git a/native/exshape_shape/src/lib.rs b/native/exshape_shape/src/lib.rs new file mode 100644 index 0000000..9733811 --- /dev/null +++ b/native/exshape_shape/src/lib.rs @@ -0,0 +1,71 @@ +use rustler::{Encoder, Env, NifResult, SchedulerFlags, Term, rustler_export_nifs}; + +mod atoms { + pub use rustler::types::atom::*; + rustler::rustler_atoms! { + atom x; + atom y; + } +} + +rustler_export_nifs! { + "Elixir.Exshape.Shp", + [ + ("native_nest_holes_impl", 2, nest_holes, SchedulerFlags::DirtyCpu) + ], + None +} + +mod point; +mod lineseg; +mod ring; +mod poly; + +use ring::Ring; +use poly::Poly; +use point::Point; + +fn nest_holes<'a>(env: Env<'a>, args: &[Term<'a>]) -> NifResult> { + let mut polys: Vec> = args[0].decode()?; + let holes: Vec> = args[1].decode()?; + + if holes.len() == 1 { + // if there's only a single hole, we won't bother slicing the + // polygons, since we'd just throw away all that work anyway. + let hole = holes.into_iter().next().unwrap(); + process(&mut polys, hole, Ring::contains_unsliced) + } else { + for hole in holes { + process(&mut polys, hole, Ring::contains); + } + } + + Ok((atoms::ok(), polys).encode(env)) +} + +fn process<'a>(polys: &mut Vec>, hole: Ring<'a>, contain: fn(&Ring<'a>, &Point) -> bool) { + match polys.len() { + 0 => { + polys.push(Poly::from_ring(hole)); + } + 1 => { + polys[0].push(hole); + } + _ => { + // in the original, this is recursive, but we'll do it + // iteratively. What we want to do is find the first poly + // which contains the first point of the ring and push the + // hole onto it. If there is no such poly, just make a + // new one at the end for the hole to live in. + let pt = hole.first_point(); + match polys.iter_mut().find(|poly| contain(poly.first_ring(), pt)) { + Some(poly) => { + poly.push(hole); + } + None => { + polys.push(Poly::from_ring(hole)); + } + } + } + } +} diff --git a/native/exshape_shape/src/lineseg.rs b/native/exshape_shape/src/lineseg.rs new file mode 100644 index 0000000..2de56cd --- /dev/null +++ b/native/exshape_shape/src/lineseg.rs @@ -0,0 +1,7 @@ +use crate::point::Point; + +#[derive(Clone, Copy, Debug)] +pub struct LineSeg { + pub a: Point, + pub b: Point +} diff --git a/native/exshape_shape/src/point.rs b/native/exshape_shape/src/point.rs new file mode 100644 index 0000000..986f62a --- /dev/null +++ b/native/exshape_shape/src/point.rs @@ -0,0 +1,5 @@ +#[derive(Clone, Copy, Debug)] +pub struct Point { + pub x: f64, + pub y: f64 +} diff --git a/native/exshape_shape/src/poly.rs b/native/exshape_shape/src/poly.rs new file mode 100644 index 0000000..c9abe23 --- /dev/null +++ b/native/exshape_shape/src/poly.rs @@ -0,0 +1,37 @@ +use rustler::{Decoder, Encoder, Error, NifResult, Term}; + +use crate::ring::Ring; + +pub struct Poly<'a> { + rings: Vec> +} + +impl <'a> Poly<'a> { + pub fn from_ring(ring: Ring<'a>) -> Self { + Self { rings: vec![ring] } + } + + pub fn first_ring(&self) -> &Ring<'a> { + &self.rings[0] + } + + pub fn push(&mut self, ring: Ring<'a>) { + self.rings.push(ring) + } +} + +impl <'a> Decoder<'a> for Poly<'a> { + fn decode(term: Term<'a>) -> NifResult { + let rings = term.decode::>()?; + if rings.is_empty() { + return Err(Error::BadArg); + } + Ok(Poly { rings }) + } +} + +impl <'a> Encoder for Poly<'a> { + fn encode<'b>(&self, env: rustler::Env<'b>) -> Term<'b> { + self.rings.encode(env) + } +} diff --git a/native/exshape_shape/src/ring.rs b/native/exshape_shape/src/ring.rs new file mode 100644 index 0000000..3978270 --- /dev/null +++ b/native/exshape_shape/src/ring.rs @@ -0,0 +1,223 @@ +use itertools::Itertools; +use std::cell::{Ref, RefCell}; +use derivative::Derivative; + +use rustler::{Decoder, Encoder, Env, ListIterator, NifResult, Term, Error}; + +use crate::point::Point; +use crate::lineseg::LineSeg; +use crate::atoms; + +#[derive(Debug)] +struct Slices { + segments: Vec>, + y_min: f64, + y_max: f64 +} + +#[derive(Derivative)] +#[derivative(Debug)] +pub struct Ring<'a> { + #[derivative(Debug = "ignore")] + term: Term<'a>, + points: Vec, + slices: RefCell> +} + +impl <'a> Decoder<'a> for Ring<'a> { + fn decode(term: Term<'a>) -> NifResult { + // could define a Decoder for Point and just use Vec's Decoder + // impl, but this way we can look up the atoms just once per + // ring instead of once per point... + + let env = term.get_env(); + let x = atoms::x().to_term(env); + let y = atoms::y().to_term(env); + let points = + term.decode::>()?.map(|pt| { + Ok(Point { x : pt.map_get(x)?.decode()?, + y : pt.map_get(y)?.decode()? }) + }).collect::>>()?; + + if points.is_empty() { + return Err(Error::BadArg); + } + + Ok( + Ring { + term, + points, + slices: RefCell::new(None) + } + ) + } +} + +impl <'a> Encoder for Ring<'a> { + fn encode<'b>(&self, env: Env<'b>) -> Term<'b> { + self.term.in_env(env) + } +} + +impl <'a> Ring<'a> { + pub fn first_point(&self) -> &Point { + &self.points[0] // guaranteed to exist because the decoder requires non-emptiness + } + + fn slices(&self) -> Ref { + let mut slices = self.slices.borrow(); + if slices.is_none() { + drop(slices); + *self.slices.borrow_mut() = Some(slice(&self.points)); + slices = self.slices.borrow(); + } + Ref::map(slices, |t| t.as_ref().unwrap()) + } + + fn slice_for(&self, pt: &Point) -> Option>> { + let slices = self.slices(); + if pt.y < slices.y_min || pt.y > slices.y_max { + None + } else { + Some(Ref::map(slices, |slices| { + &slices.segments[band_for(slices.y_min, slices.y_max, pt.y, slices.segments.len())] + })) + } + } + + pub fn contains(&self, pt: &Point) -> bool { + match self.slice_for(pt) { + None => { + false + } + Some(vec) => { + let Point { x, y } = *pt; + vec.iter().fold(false, move |c, lineseg| { + if ((lineseg.a.y > y) != (lineseg.b.y > y)) && (x < ((((lineseg.b.x - lineseg.a.x) * (y - lineseg.a.y)) / (lineseg.b.y - lineseg.a.y)) + lineseg.a.x)) { + !c + } else { + c + } + }) + } + } + } + + pub fn contains_unsliced(&self, pt: &Point) -> bool { + let Point { x, y } = *pt; + self.points.iter().fold((false, self.points.last().unwrap()), move |(c, j), i| { + let c = + if ((i.y > y) != (j.y > y)) && (x < ((((j.x - i.x) * (y - i.y)) / (j.y - i.y)) + i.x)) { + !c + } else { + c + }; + (c, i) + }).0 + } +} + +fn band_for(y_min: f64, y_max: f64, y: f64, bands: usize) -> usize { + let range = y_max - y_min; + let frac = (y - y_min) / range; + (bands as f64 * frac).floor() as usize +} + +fn slice(points: &Vec) -> Slices { + let (y_min, y_max) = + points.iter().fold((f64::INFINITY, f64::NEG_INFINITY), |(min, max), pt| { + (min.min(pt.y), max.max(pt.y)) + }); + + // poke out the range a tiny bit to avoid edge cases + let y_min = float_extras::f64::nextafter(y_min, f64::NEG_INFINITY); + let y_max = float_extras::f64::nextafter(y_max, f64::INFINITY); + + let mut segments = vec![Vec::new(); 10]; + + for (&a, &b) in points.last().into_iter().chain(points.into_iter()).tuple_windows() { + let a_seg = band_for(y_min, y_max, a.y.min(y_max).max(y_min), segments.len()); + let b_seg = band_for(y_min, y_max, b.y.min(y_max).max(y_min), segments.len()); + + let min_seg = a_seg.min(b_seg); + let max_seg = a_seg.max(b_seg); + + for seg in min_seg..=max_seg { + segments[seg].push(LineSeg { a, b }); + } + } + + Slices { + y_min, + y_max, + segments + } +} + +#[cfg(test)] +mod test { + use std::cell::RefCell; + use rustler::{Term, Env}; + use super::Ring; + use crate::point::Point; + + fn fake_term() -> Term<'static> { + // SAFETY: this in fact isn't safe :) + // But trying to do anything with the term will crash anyway, + // because the nif dynamic library won't be loaded while + // running the tests. + unsafe { + Term::new(Env::new(&(), std::ptr::null_mut()), 0) + } + } + + fn unit_square() -> Ring<'static> { + Ring { + term: fake_term(), + points: vec![Point { x: -0.5, y: -0.5 }, + Point { x: -0.5, y: 0.5 }, + Point { x: 0.5, y: 0.5 }, + Point { x: 0.5, y: -0.5 }], + slices: RefCell::new(None) + } + } + + fn u_shape() -> Ring<'static> { + Ring { + term: fake_term(), + points: vec![Point { x: -0.5, y: -0.5 }, + Point { x: -0.5, y: 0.5 }, + Point { x: -0.4, y: 0.5 }, + Point { x: -0.4, y: -0.4 }, + Point { x: 0.4, y: -0.4}, + Point { x: 0.4, y: 0.5 }, + Point { x: 0.5, y: 0.5 }, + Point { x: 0.5, y: -0.5 }], + slices: RefCell::new(None) + } + } + + #[test] + fn basic_sanity_check() { + let sq = unit_square(); + assert!(sq.contains(&Point { x: 0.0, y: 0.0 })); + assert!(!sq.contains(&Point { x: 10.0, y: 0.0 })); + assert!(!sq.contains(&Point { x: -10.0, y: 0.0 })); + assert!(!sq.contains(&Point { x: 0.0, y: 10.0 })); + assert!(!sq.contains(&Point { x: 0.0, y: -10.0 })); + } + + #[test] + fn more_complex_sanity_check() { + let shape = u_shape(); + assert!(shape.contains(&Point { x: -0.45, y: 0.0 })); + assert!(shape.contains(&Point { x: 0.45, y: 0.0 })); + assert!(shape.contains(&Point { x: 0.0, y: -0.45 })); + assert!(!shape.contains(&Point { x: 0.0, y: 0.45 })); + assert!(!shape.contains(&Point { x: 0.0, y: 0.0 })); + assert!(!shape.contains(&Point { x: 10.0, y: 0.0 })); + assert!(!shape.contains(&Point { x: -10.0, y: 0.0 })); + assert!(!shape.contains(&Point { x: 0.0, y: 10.0 })); + assert!(!shape.contains(&Point { x: 0.0, y: -10.0 })); + } +} diff --git a/test/exshape_test.exs b/test/exshape_test.exs index 4d1af9f..3491214 100644 --- a/test/exshape_test.exs +++ b/test/exshape_test.exs @@ -115,4 +115,28 @@ defmodule ExshapeTest do [_, {shape, _}] = Enum.into(stream, []) assert (shape.points |> List.flatten |> length) == 174045 end + + test "native and beam produce the same results" do + ["archive", "chicago_zoning", "co-parcels", "hoods", "howard-beach", "row_181", "seattle_basketball_points", "speed_enforcement", "zillow"] + |> Enum.each(fn path -> + [{_, _, beam_stream}] = zip(path) |> Exshape.from_zip(native: false) + beam = Enum.into(beam_stream, []) + [{_, _, native_stream}] = zip(path) |> Exshape.from_zip(native: true) + native = Enum.into(native_stream, []) + assert beam == native + end) + + ["multipatch", "multipointm", "multipoint", "multipointz", "pointm", "point", "pointz", "polygonm", "polygon", "polygons", "polygonz", "polylinem", "polyline", "polylinez", "Neighborhoods/neighborhoods_orleans"] + |> Enum.each(fn path -> + beam = shp(path) + |> File.stream!([], 2048) + |> Exshape.Shp.read(native: false) + |> Enum.into([]) + native = shp(path) + |> File.stream!([], 2048) + |> Exshape.Shp.read(native: true) + |> Enum.into([]) + assert beam == native + end) + end end diff --git a/test/test_helper.exs b/test/test_helper.exs index 9b1256e..cf59491 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -4,5 +4,6 @@ defmodule TestHelper do end def zip(name), do: "#{__DIR__}/fixtures/#{name}.zip" + def shp(name), do: "#{__DIR__}/fixtures/#{name}.shp" end ExUnit.start() From 1d1d76d5543547e824fa642bdea3949a9a4cc9c4 Mon Sep 17 00:00:00 2001 From: "Robert J. Macomber" Date: Tue, 5 Jan 2021 12:46:25 -0800 Subject: [PATCH 2/7] Make the Rust hole-nester the default --- lib/exshape/shp.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/exshape/shp.ex b/lib/exshape/shp.ex index 65b5188..d652cc5 100644 --- a/lib/exshape/shp.ex +++ b/lib/exshape/shp.ex @@ -769,7 +769,7 @@ defmodule Exshape.Shp do ``` """ def read(byte_stream, opts \\ []) do - native = Keyword.get(opts, :native, false) + native = Keyword.get(opts, :native, true) state = %State{ nest_holes: if(native, do: &native_nest_holes/2, else: &beam_nest_holes/2) From 9670c3ee3050dce862a1f2550d223dc01281ccb0 Mon Sep 17 00:00:00 2001 From: "Robert J. Macomber" Date: Tue, 5 Jan 2021 13:28:58 -0800 Subject: [PATCH 3/7] Make the Rust behavior the same as the Elixir in a pathological case If there's a multipolygon with more than one polygon and a hole that doesn't fit in any of them, the Elixir version will attach the hole to the last polygon. --- native/exshape_shape/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/native/exshape_shape/src/lib.rs b/native/exshape_shape/src/lib.rs index 9733811..7925035 100644 --- a/native/exshape_shape/src/lib.rs +++ b/native/exshape_shape/src/lib.rs @@ -55,15 +55,15 @@ fn process<'a>(polys: &mut Vec>, hole: Ring<'a>, contain: fn(&Ring<'a>, // in the original, this is recursive, but we'll do it // iteratively. What we want to do is find the first poly // which contains the first point of the ring and push the - // hole onto it. If there is no such poly, just make a - // new one at the end for the hole to live in. + // hole onto it. If it doesn't fit in any poly, just smash + // it onlo the last. let pt = hole.first_point(); match polys.iter_mut().find(|poly| contain(poly.first_ring(), pt)) { Some(poly) => { poly.push(hole); } None => { - polys.push(Poly::from_ring(hole)); + polys.last_mut().unwrap().push(hole); } } } From 933e47f328f61a17c228bb8091d36a17b209a3a8 Mon Sep 17 00:00:00 2001 From: "Robert J. Macomber" Date: Tue, 5 Jan 2021 14:18:02 -0800 Subject: [PATCH 4/7] Test both contains methods --- native/exshape_shape/src/ring.rs | 38 ++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/native/exshape_shape/src/ring.rs b/native/exshape_shape/src/ring.rs index 3978270..e38fb30 100644 --- a/native/exshape_shape/src/ring.rs +++ b/native/exshape_shape/src/ring.rs @@ -197,27 +197,37 @@ mod test { } } + fn contains(ring: &Ring, pt: Point) { + assert!(ring.contains(&pt)); + assert!(ring.contains_unsliced(&pt)); + } + + fn doesnt_contain(ring: &Ring, pt: Point) { + assert!(!ring.contains(&pt)); + assert!(!ring.contains_unsliced(&pt)); + } + #[test] fn basic_sanity_check() { let sq = unit_square(); - assert!(sq.contains(&Point { x: 0.0, y: 0.0 })); - assert!(!sq.contains(&Point { x: 10.0, y: 0.0 })); - assert!(!sq.contains(&Point { x: -10.0, y: 0.0 })); - assert!(!sq.contains(&Point { x: 0.0, y: 10.0 })); - assert!(!sq.contains(&Point { x: 0.0, y: -10.0 })); + contains(&sq, Point { x: 0.0, y: 0.0 }); + doesnt_contain(&sq, Point { x: 10.0, y: 0.0 }); + doesnt_contain(&sq, Point { x: -10.0, y: 0.0 }); + doesnt_contain(&sq, Point { x: 0.0, y: 10.0 }); + doesnt_contain(&sq, Point { x: 0.0, y: -10.0 }); } #[test] fn more_complex_sanity_check() { let shape = u_shape(); - assert!(shape.contains(&Point { x: -0.45, y: 0.0 })); - assert!(shape.contains(&Point { x: 0.45, y: 0.0 })); - assert!(shape.contains(&Point { x: 0.0, y: -0.45 })); - assert!(!shape.contains(&Point { x: 0.0, y: 0.45 })); - assert!(!shape.contains(&Point { x: 0.0, y: 0.0 })); - assert!(!shape.contains(&Point { x: 10.0, y: 0.0 })); - assert!(!shape.contains(&Point { x: -10.0, y: 0.0 })); - assert!(!shape.contains(&Point { x: 0.0, y: 10.0 })); - assert!(!shape.contains(&Point { x: 0.0, y: -10.0 })); + contains(&shape, Point { x: -0.45, y: 0.0 }); + contains(&shape, Point { x: 0.45, y: 0.0 }); + contains(&shape, Point { x: 0.0, y: -0.45 }); + doesnt_contain(&shape, Point { x: 0.0, y: 0.45 }); + doesnt_contain(&shape, Point { x: 0.0, y: 0.0 }); + doesnt_contain(&shape, Point { x: 10.0, y: 0.0 }); + doesnt_contain(&shape, Point { x: -10.0, y: 0.0 }); + doesnt_contain(&shape, Point { x: 0.0, y: 10.0 }); + doesnt_contain(&shape, Point { x: 0.0, y: -10.0 }); } } From ff8f7f10c799705bb79c917f0f13dc530b61f3f6 Mon Sep 17 00:00:00 2001 From: "Robert J. Macomber" Date: Wed, 6 Jan 2021 09:12:18 -0800 Subject: [PATCH 5/7] Correct crate name in mix.exs This doesn't actually seem to matter, but it's good hygiene --- mix.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mix.exs b/mix.exs index 917a2c5..d06b4a7 100644 --- a/mix.exs +++ b/mix.exs @@ -21,7 +21,7 @@ defmodule Exshape.Mixfile do [] else [ - exsoda_shape: [ + exshape_shape: [ path: "native/exshape_shape", mode: :release ] From f95c1e2aed1d0bbcbeeb7e1856c2227be54d6f69 Mon Sep 17 00:00:00 2001 From: "Robert J. Macomber" Date: Thu, 7 Jan 2021 09:37:08 -0800 Subject: [PATCH 6/7] Move the is_clockwise check into Rust too This improves the runtime of the problematic shapefile by another third. --- lib/exshape/shp.ex | 36 ++++++++++++++------------------ native/exshape_shape/src/lib.rs | 29 +++++++++++++++++-------- native/exshape_shape/src/poly.rs | 10 +++++---- native/exshape_shape/src/ring.rs | 24 +++++++++++++++++++-- test/shp_test.exs | 15 +++++++++---- 5 files changed, 75 insertions(+), 39 deletions(-) diff --git a/lib/exshape/shp.ex b/lib/exshape/shp.ex index d652cc5..55c0fbf 100644 --- a/lib/exshape/shp.ex +++ b/lib/exshape/shp.ex @@ -3,7 +3,7 @@ defmodule Exshape.Shp do use Rustler, otp_app: :exshape, crate: :exshape_shape defmodule State do - @enforce_keys [:nest_holes] + @enforce_keys [:nest_polygon] defstruct mode: :header, shape_type: nil, emit: [], @@ -12,7 +12,7 @@ defmodule Exshape.Shp do part_index: 0, measures: [], z_values: [], - nest_holes: nil + nest_polygon: nil end @magic_nodata_num :math.pow(10, 38) * -1 @@ -116,8 +116,8 @@ defmodule Exshape.Shp do %{s | measures: [], z_values: []} end - defp emit(%State{nest_holes: nest_holes} = s, %Polygon{} = p) do - %{s | mode: :record_header, emit: [%{p | points: nest_polygon(p, nest_holes)} | s.emit]} + defp emit(%State{nest_polygon: nest_polygon} = s, %Polygon{} = p) do + %{s | mode: :record_header, emit: [%{p | points: nest_polygon.(p)} | s.emit]} end defp emit(s, %Polyline{} = p) do @@ -139,9 +139,9 @@ defmodule Exshape.Shp do %{s | mode: :record_header, emit: [polylinem | s.emit]} |> reset_unzipped end - defp emit(%State{nest_holes: nest_holes} = s, %PolygonM{} = pm) do + defp emit(%State{nest_polygon: nest_polygon} = s, %PolygonM{} = pm) do p = zip_measures(pm, s) - polylinem = %{p | points: nest_polygon(p, nest_holes)} + polylinem = %{p | points: nest_polygon.(p)} %{s | mode: :record_header, emit: [polylinem | s.emit]} |> reset_unzipped end @@ -163,12 +163,12 @@ defmodule Exshape.Shp do %{s | mode: :record_header, emit: [polylinez | s.emit]} |> reset_unzipped end - defp emit(%State{nest_holes: nest_holes} = s, %PolygonZ{} = pz) do + defp emit(%State{nest_polygon: nest_polygon} = s, %PolygonZ{} = pz) do p = pz |> zip_measures(s) |> zip_zvals(s) - polygonz = %{p | points: nest_polygon(p, nest_holes)} + polygonz = %{p | points: nest_polygon.(p)} %{s | mode: :record_header, emit: [polygonz | s.emit]} |> reset_unzipped end @@ -223,20 +223,16 @@ defmodule Exshape.Shp do parts end - def nest_polygon(p, nest_holes \\ &beam_nest_holes/2) do - {polys, holes} = unflatten_parts(p) |> Enum.split_with(&is_clockwise?/1) - - nest_holes.(Enum.map(polys, fn p -> [p] end), holes) - end - - defp native_nest_holes(polys, holes) do - {:ok, r} = native_nest_holes_impl(polys, holes) + def native_nest_polygon(p) do + {:ok, r} = native_nest_polygon_impl(unflatten_parts(p)) r end - defp native_nest_holes_impl(_polys, _holes), do: throw :nif_not_loaded + defp native_nest_polygon_impl(_p), do: throw :nif_not_loaded + + def beam_nest_polygon(p) do + {polys, holes} = unflatten_parts(p) |> Enum.split_with(&is_clockwise?/1) - defp beam_nest_holes(polys, holes) do - Enum.reduce(holes, polys, fn hole, polys -> + Enum.reduce(holes, Enum.map(polys, fn p -> [p] end), fn hole, polys -> nest_hole(hole, polys) end) end @@ -772,7 +768,7 @@ defmodule Exshape.Shp do native = Keyword.get(opts, :native, true) state = %State{ - nest_holes: if(native, do: &native_nest_holes/2, else: &beam_nest_holes/2) + nest_polygon: if(native, do: &native_nest_polygon/1, else: &beam_nest_polygon/1) } Stream.transform(byte_stream, {<<>>, state}, fn bin, {buf, state} -> case do_read(state, buf <> bin) do diff --git a/native/exshape_shape/src/lib.rs b/native/exshape_shape/src/lib.rs index 7925035..3ce657a 100644 --- a/native/exshape_shape/src/lib.rs +++ b/native/exshape_shape/src/lib.rs @@ -1,4 +1,5 @@ use rustler::{Encoder, Env, NifResult, SchedulerFlags, Term, rustler_export_nifs}; +use itertools::{Itertools, Either}; mod atoms { pub use rustler::types::atom::*; @@ -11,7 +12,7 @@ mod atoms { rustler_export_nifs! { "Elixir.Exshape.Shp", [ - ("native_nest_holes_impl", 2, nest_holes, SchedulerFlags::DirtyCpu) + ("native_nest_polygon_impl", 1, nest_polygon, SchedulerFlags::DirtyCpu) ], None } @@ -25,28 +26,38 @@ use ring::Ring; use poly::Poly; use point::Point; -fn nest_holes<'a>(env: Env<'a>, args: &[Term<'a>]) -> NifResult> { - let mut polys: Vec> = args[0].decode()?; - let holes: Vec> = args[1].decode()?; +fn nest_polygon<'a>(env: Env<'a>, args: &[Term<'a>]) -> NifResult> { + let rings = args[0].decode::>>()?; + let (mut polys, holes) = rings.into_iter().partition_map(|ring| { + if ring.is_clockwise() { + Either::Left(ring.into()) + } else { + Either::Right(ring) + } + }); + nest_holes(&mut polys, holes); + + Ok((atoms::ok(), polys).encode(env)) +} + +fn nest_holes<'a>(polys: &mut Vec>, holes: Vec>) { if holes.len() == 1 { // if there's only a single hole, we won't bother slicing the // polygons, since we'd just throw away all that work anyway. let hole = holes.into_iter().next().unwrap(); - process(&mut polys, hole, Ring::contains_unsliced) + process(polys, hole, Ring::contains_unsliced) } else { for hole in holes { - process(&mut polys, hole, Ring::contains); + process(polys, hole, Ring::contains); } } - - Ok((atoms::ok(), polys).encode(env)) } fn process<'a>(polys: &mut Vec>, hole: Ring<'a>, contain: fn(&Ring<'a>, &Point) -> bool) { match polys.len() { 0 => { - polys.push(Poly::from_ring(hole)); + polys.push(hole.into()); } 1 => { polys[0].push(hole); diff --git a/native/exshape_shape/src/poly.rs b/native/exshape_shape/src/poly.rs index c9abe23..a6309ef 100644 --- a/native/exshape_shape/src/poly.rs +++ b/native/exshape_shape/src/poly.rs @@ -7,10 +7,6 @@ pub struct Poly<'a> { } impl <'a> Poly<'a> { - pub fn from_ring(ring: Ring<'a>) -> Self { - Self { rings: vec![ring] } - } - pub fn first_ring(&self) -> &Ring<'a> { &self.rings[0] } @@ -20,6 +16,12 @@ impl <'a> Poly<'a> { } } +impl <'a> From> for Poly<'a> { + fn from(ring: Ring<'a>) -> Self { + Self { rings: vec![ring] } + } +} + impl <'a> Decoder<'a> for Poly<'a> { fn decode(term: Term<'a>) -> NifResult { let rings = term.decode::>()?; diff --git a/native/exshape_shape/src/ring.rs b/native/exshape_shape/src/ring.rs index e38fb30..1313d1a 100644 --- a/native/exshape_shape/src/ring.rs +++ b/native/exshape_shape/src/ring.rs @@ -24,6 +24,18 @@ pub struct Ring<'a> { slices: RefCell> } +// The terms that we get from shapefiles will only ever be floats, but +// that's annoying to write by hand in test code, so we'll accept +// integers too. +fn decode_floatish<'a>(term: Term<'a>) -> NifResult { + term.decode().or_else(|e| { + match e { + Error::BadArg => term.decode::().map(|i| i as f64), + other => Err(other) + } + }) +} + impl <'a> Decoder<'a> for Ring<'a> { fn decode(term: Term<'a>) -> NifResult { // could define a Decoder for Point and just use Vec's Decoder @@ -35,8 +47,8 @@ impl <'a> Decoder<'a> for Ring<'a> { let y = atoms::y().to_term(env); let points = term.decode::>()?.map(|pt| { - Ok(Point { x : pt.map_get(x)?.decode()?, - y : pt.map_get(y)?.decode()? }) + Ok(Point { x : decode_floatish(pt.map_get(x)?)?, + y : decode_floatish(pt.map_get(y)?)? }) }).collect::>>()?; if points.is_empty() { @@ -64,6 +76,14 @@ impl <'a> Ring<'a> { &self.points[0] // guaranteed to exist because the decoder requires non-emptiness } + pub fn is_clockwise(&self) -> bool { + let (_, area) = + self.points[1..].iter().fold((&self.points[0], 0.0), |(prev_pt, s), pt| { + (pt, s + (pt.x - prev_pt.x) * (pt.y + prev_pt.y)) + }); + area >= 0.0 + } + fn slices(&self) -> Ref { let mut slices = self.slices.borrow(); if slices.is_none() { diff --git a/test/shp_test.exs b/test/shp_test.exs index 7a5de4a..1735711 100644 --- a/test/shp_test.exs +++ b/test/shp_test.exs @@ -10,6 +10,13 @@ defmodule ShpTest do } doctest Exshape + def nest_polygon(p) do + beam = Shp.beam_nest_polygon(p) + native = Shp.native_nest_polygon(p) + assert beam == native + beam + end + describe "regular geoms" do test "can read points" do [_header | points] = fixture("point.shp") @@ -304,7 +311,7 @@ defmodule ShpTest do describe "nesting" do test "can nest holes" do - assert Shp.nest_polygon(%Polygon{ + assert nest_polygon(%Polygon{ parts: [0, 5], points: Enum.reverse([ %Point{x: 0, y: 4}, @@ -340,7 +347,7 @@ defmodule ShpTest do end test "appends a part to the polygon when the part is clockwise" do - assert Shp.nest_polygon(%Polygon{ + assert nest_polygon(%Polygon{ parts: [0, 5], points: Enum.reverse([ %Point{x: 0, y: 4}, @@ -422,7 +429,7 @@ defmodule ShpTest do end test "can nest many holes" do - assert Shp.nest_polygon(%Polygon{ + assert nest_polygon(%Polygon{ parts: [0, 5, 10], points: Enum.reverse([ %Point{x: 0, y: 5}, @@ -471,7 +478,7 @@ defmodule ShpTest do end test "can nest holes and rings" do - assert Shp.nest_polygon(%Polygon{ + assert nest_polygon(%Polygon{ parts: [0, 5, 10], points: Enum.reverse([ %Point{x: 0, y: 5}, From b019afe18d0a1d7353d63b60a36801646c4040c3 Mon Sep 17 00:00:00 2001 From: "Robert J. Macomber" Date: Wed, 23 Jun 2021 13:02:18 -0700 Subject: [PATCH 7/7] Port to rustler 0.22.0 --- lib/exshape/shp.ex | 2 +- mix.exs | 19 +--- mix.lock | 2 +- native/exshape_shape/Cargo.lock | 154 ++++---------------------------- native/exshape_shape/Cargo.toml | 4 +- native/exshape_shape/src/lib.rs | 28 +++--- 6 files changed, 35 insertions(+), 174 deletions(-) diff --git a/lib/exshape/shp.ex b/lib/exshape/shp.ex index 55c0fbf..19e3bfa 100644 --- a/lib/exshape/shp.ex +++ b/lib/exshape/shp.ex @@ -1,6 +1,6 @@ defmodule Exshape.Shp do require Rustler - use Rustler, otp_app: :exshape, crate: :exshape_shape + use Rustler, otp_app: :exshape, crate: :exshape_shape, mode: :release defmodule State do @enforce_keys [:nest_polygon] diff --git a/mix.exs b/mix.exs index d06b4a7..89c4c38 100644 --- a/mix.exs +++ b/mix.exs @@ -10,25 +10,10 @@ defmodule Exshape.Mixfile do package: package(), build_embedded: Mix.env() == :prod, start_permanent: Mix.env() == :prod, - deps: deps(), - compilers: [:rustler] ++ Mix.compilers, - rustler_crates: rustler_crates() + deps: deps() ] end - defp rustler_crates do - if System.get_env("ALCHEMIST_MODE") do - [] - else - [ - exshape_shape: [ - path: "native/exshape_shape", - mode: :release - ] - ] - end - end - defp description do """ Read ESRI Shapefiles as a stream of features and their attributes @@ -65,7 +50,7 @@ defmodule Exshape.Mixfile do {:elixir_uuid, "~> 1.2"}, {:ex_doc, ">= 0.0.0", only: :dev}, {:poison, "~> 3.1", only: :test}, - {:rustler, "~> 0.21.1", sparse: "rustler_mix"}, + {:rustler, "~> 0.22.0"}, ] end end diff --git a/mix.lock b/mix.lock index dcbafce..bbf56eb 100644 --- a/mix.lock +++ b/mix.lock @@ -6,7 +6,7 @@ "makeup_elixir": {:hex, :makeup_elixir, "0.8.0", "1204a2f5b4f181775a0e456154830524cf2207cf4f9112215c05e0b76e4eca8b", [:mix], [{:makeup, "~> 0.5.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "393d17c5a648e3b30522b2a4743bd1dc3533e1227c8c2823ebe8c3a8e5be5913"}, "nimble_parsec": {:hex, :nimble_parsec, "0.2.2", "d526b23bdceb04c7ad15b33c57c4526bf5f50aaa70c7c141b4b4624555c68259", [:mix], [], "hexpm", "4ababf5c44164f161872704e1cfbecab3935fdebec66c72905abaad0e6e5cef6"}, "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm", "fec8660eb7733ee4117b85f55799fd3833eb769a6df71ccf8903e8dc5447cfce"}, - "rustler": {:hex, :rustler, "0.21.1", "5299980be32da997c54382e945bacaa015ed97a60745e1e639beaf6a7b278c65", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6ee1651e10645b2b2f3bb70502bf180341aa058709177e9bc28c105934094bc6"}, + "rustler": {:hex, :rustler, "0.22.0", "e2930f9d6933e910f87526bb0a7f904e32b62a7e838a3ca4a884ee7fdfb957ed", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "01f5989dd511ebec09be481e07d3c59773d5373c5061e09d3ebc3ef61811b49d"}, "toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm", "f1e3dabef71fb510d015fad18c0e05e7c57281001141504c6b69d94e99750a07"}, "uuid": {:hex, :uuid, "1.1.8", "e22fc04499de0de3ed1116b770c7737779f226ceefa0badb3592e64d5cfb4eb9", [:mix], [], "hexpm"}, } diff --git a/native/exshape_shape/Cargo.lock b/native/exshape_shape/Cargo.lock index 57180d4..977025d 100644 --- a/native/exshape_shape/Cargo.lock +++ b/native/exshape_shape/Cargo.lock @@ -1,45 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -[[package]] -name = "addr2line" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55f82cfe485775d02112886f4169bde0c5894d75e79ead7eafe7e40a25e45f7" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" - -[[package]] -name = "autocfg" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" - -[[package]] -name = "backtrace" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef5140344c85b01f9bbb4d4b7288a8aa4b3287ccef913a14bcc78a1063623598" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +version = 3 [[package]] name = "derivative" @@ -47,9 +8,9 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaed5874effa6cde088c644ddcdcb4ffd1511391c5be4fdd7a5ccd02c7e4a183" dependencies = [ - "proc-macro2 1.0.24", - "quote 1.0.8", - "syn 1.0.57", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -69,15 +30,6 @@ dependencies = [ "rustler_codegen", ] -[[package]] -name = "failure" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" -dependencies = [ - "backtrace", -] - [[package]] name = "float_extras" version = "0.1.6" @@ -87,12 +39,6 @@ dependencies = [ "libc", ] -[[package]] -name = "gimli" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" - [[package]] name = "heck" version = "0.3.2" @@ -123,47 +69,13 @@ version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" -[[package]] -name = "miniz_oxide" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d" -dependencies = [ - "adler", - "autocfg", -] - -[[package]] -name = "object" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3b63360ec3cb337817c2dbd47ab4a0f170d285d8e5a2064600f3def1402397" - -[[package]] -name = "proc-macro2" -version = "0.4.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" -dependencies = [ - "unicode-xid 0.1.0", -] - [[package]] name = "proc-macro2" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" dependencies = [ - "unicode-xid 0.2.1", -] - -[[package]] -name = "quote" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" -dependencies = [ - "proc-macro2 0.4.30", + "unicode-xid", ] [[package]] @@ -172,37 +84,30 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" dependencies = [ - "proc-macro2 1.0.24", + "proc-macro2", ] -[[package]] -name = "rustc-demangle" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e3bad0ee36814ca07d7968269dd4b7ec89ec2da10c4bb613928d3077083c232" - [[package]] name = "rustler" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533dc3379a0f166749ce262a941e9b52ce19c3208729fc6b6cce76aea76d939b" +checksum = "b787d3b2a80007f41cd4c0c310cdeb3936192768159585f65ecc7e96faf97fc3" dependencies = [ "lazy_static", "rustler_codegen", "rustler_sys", - "which", ] [[package]] name = "rustler_codegen" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21563a1c4b02773f5c6dce723630c9998694258ff4d67bd6025ba057a29b51c" +checksum = "b5a1f867002b6f0130f47abf215cac4405646db6f5d7b009b21c890980490aa4" dependencies = [ "heck", - "proc-macro2 0.4.30", - "quote 0.6.13", - "syn 0.15.44", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -214,26 +119,15 @@ dependencies = [ "unreachable", ] -[[package]] -name = "syn" -version = "0.15.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" -dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "unicode-xid 0.1.0", -] - [[package]] name = "syn" version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4211ce9909eb971f111059df92c45640aad50a619cf55cd76476be803c4c68e6" dependencies = [ - "proc-macro2 1.0.24", - "quote 1.0.8", - "unicode-xid 0.2.1", + "proc-macro2", + "quote", + "unicode-xid", ] [[package]] @@ -242,12 +136,6 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" -[[package]] -name = "unicode-xid" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" - [[package]] name = "unicode-xid" version = "0.2.1" @@ -268,13 +156,3 @@ name = "void" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" - -[[package]] -name = "which" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b57acb10231b9493c8472b20cb57317d0679a49e0bdbee44b3b803a6473af164" -dependencies = [ - "failure", - "libc", -] diff --git a/native/exshape_shape/Cargo.toml b/native/exshape_shape/Cargo.toml index 5e33c23..c8ab39e 100644 --- a/native/exshape_shape/Cargo.toml +++ b/native/exshape_shape/Cargo.toml @@ -10,8 +10,8 @@ path = "src/lib.rs" crate-type = ["dylib"] [dependencies] -rustler = "0.21.1" -rustler_codegen = "0.21.1" +rustler = "0.22.0" +rustler_codegen = "0.22.0" itertools = "0.10" derivative = "2.1" float_extras = "0.1" diff --git a/native/exshape_shape/src/lib.rs b/native/exshape_shape/src/lib.rs index 3ce657a..9c01ebe 100644 --- a/native/exshape_shape/src/lib.rs +++ b/native/exshape_shape/src/lib.rs @@ -1,21 +1,13 @@ -use rustler::{Encoder, Env, NifResult, SchedulerFlags, Term, rustler_export_nifs}; +use rustler::{self, Encoder, Env, Term}; use itertools::{Itertools, Either}; mod atoms { + use ::rustler; pub use rustler::types::atom::*; - rustler::rustler_atoms! { - atom x; - atom y; - } + rustler::atoms! { x, y } } -rustler_export_nifs! { - "Elixir.Exshape.Shp", - [ - ("native_nest_polygon_impl", 1, nest_polygon, SchedulerFlags::DirtyCpu) - ], - None -} +rustler::init!("Elixir.Exshape.Shp", [native_nest_polygon_impl]); mod point; mod lineseg; @@ -26,9 +18,15 @@ use ring::Ring; use poly::Poly; use point::Point; -fn nest_polygon<'a>(env: Env<'a>, args: &[Term<'a>]) -> NifResult> { - let rings = args[0].decode::>>()?; +struct Yes(T); +impl Encoder for Yes { + fn encode<'a>(&self, env: Env<'a>) -> Term<'a> { + (atoms::ok(), &self.0).encode(env) + } +} +#[rustler::nif(schedule = "DirtyCpu")] +fn native_nest_polygon_impl<'a>(rings: Vec>) -> Yes>> { let (mut polys, holes) = rings.into_iter().partition_map(|ring| { if ring.is_clockwise() { Either::Left(ring.into()) @@ -38,7 +36,7 @@ fn nest_polygon<'a>(env: Env<'a>, args: &[Term<'a>]) -> NifResult> { }); nest_holes(&mut polys, holes); - Ok((atoms::ok(), polys).encode(env)) + Yes(polys) } fn nest_holes<'a>(polys: &mut Vec>, holes: Vec>) {