Skip to content

Commit 6656b2c

Browse files
Merge pull request #95 from contour-terminal/improvement/tablegen
tablegen: Improve performance for names
2 parents d5ae8ec + bda4daf commit 6656b2c

File tree

7 files changed

+48
-11
lines changed

7 files changed

+48
-11
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/build/
2+
/out/
23
/_deps/
34
/_ucd/
45
/.clangd/

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ option(LIBUNICODE_TOOLS "libunicode: Builds CLI tools [default: ${MASTER_PROJECT
4848
option(LIBUNICODE_BUILD_STATIC "libunicode: provide static library instead of dynamic [default: ${LIBUNICODE_BUILD_STATIC_DEFAULT}]" ${LIBUNICODE_BUILD_STATIC_DEFAULT})
4949
option(LIBUNICODE_USE_INTRINSICS "libunicode: Use SIMD extenstion during text read [default: ON]" ON)
5050
option(LIBUNICODE_USE_STD_SIMD "libunicode: Use std::simd as SIMD extenstion during text read (takes precedence over own intrinsics) [default: ON]" ${LIBUNICODE_USE_INTRINSICS})
51+
option(LIBUNICODE_TABLEGEN_FASTBUILD "libunicode: Use fast table generation (takes more memory in final tables) [default: OFF]" OFF)
5152

5253
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Enable testing of the benchmark library." FORCE)
5354
include(ThirdParties)
@@ -104,6 +105,7 @@ message(STATUS "Build mode: ${LIBUNICODE_BUILD_MODE}")
104105
message(STATUS "Build unit tests: ${LIBUNICODE_TESTING}")
105106
message(STATUS "Build benchmark: ${LIBUNICODE_BENCHMARK}")
106107
message(STATUS "Build tools: ${LIBUNICODE_TOOLS}")
108+
message(STATUS "Enable tablegen fast build: ${LIBUNICODE_TABLEGEN_FASTBUILD}")
107109
message(STATUS "Using ccache: ${USING_CCACHE_STRING}")
108110
message(STATUS "Using UCD directory: ${LIBUNICODE_UCD_DIR}")
109111
message(STATUS "Enable clang-tidy: ${ENABLE_TIDY} (${CMAKE_CXX_CLANG_TIDY})")

cmake/presets/common.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"version": 6,
33
"configurePresets": [
4-
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
4+
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", "LIBUNICODE_TABLEGEN_FASTBUILD": "ON" } },
55
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
66
{ "name": "arch-native", "hidden": true, "cacheVariables": { "CMAKE_CXX_FLAGS": "-march=native" } },
77
{ "name": "clang", "hidden": true, "cacheVariables": { "CMAKE_CXX_COMPILER": "clang++" } },

src/libunicode/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ add_custom_command(
8484

8585
add_library(unicode_loader ${LIBUNICODE_LIB_MODE} codepoint_properties_loader.h codepoint_properties_loader.cpp)
8686
add_library(unicode::loader ALIAS unicode_loader)
87+
if(LIBUNICODE_TABLEGEN_FASTBUILD)
88+
target_compile_definitions(unicode_loader PRIVATE LIBUNICODE_TABLEGEN_FASTBUILD)
89+
endif()
8790
set_target_properties(unicode_loader PROPERTIES
8891
VERSION "${PROJECT_VERSION}"
8992
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
@@ -104,7 +107,7 @@ add_library(unicode ${LIBUNICODE_LIB_MODE}
104107
utf8.cpp
105108
width.cpp
106109

107-
# auto-generated by unicode_tablgen
110+
# auto-generated by unicode_tablegen
108111
codepoint_properties_data.h
109112
codepoint_properties_data.cpp
110113
codepoint_properties_names.cpp
@@ -157,6 +160,7 @@ target_link_libraries(unicode PUBLIC unicode::ucd)
157160
add_executable(unicode_tablegen tablegen.cpp)
158161
target_link_libraries(unicode_tablegen PRIVATE unicode::loader)
159162

163+
160164
# {{{ installation
161165
set(LIBUNICODE_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/libunicode" CACHE PATH "Installation directory for cmake files, a relative path that will be joined with ${CMAKE_INSTALL_PREFIX} or an absolute path.")
162166
set(LIBUNICODE_INSTALL_CMAKE_FILES ${MASTER_PROJECT} CACHE BOOL "Decides whether or not to install CMake config and -version files.")

src/libunicode/codepoint_properties_loader.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,12 +610,32 @@ namespace
610610
{
611611
{
612612
auto const _ = scoped_timer { _log, "Creating multistage tables (properties)" };
613-
support::generate(_codepoints.data(), _codepoints.size(), _output);
613+
support::generate(_codepoints.data(),
614+
_codepoints.size(),
615+
_output,
616+
[](auto const& begin, auto const& end, auto value) noexcept {
617+
return std::find(begin, end, value);
618+
});
614619
}
615620

616621
{
617622
auto const _ = scoped_timer { _log, "Creating multistage tables (names)" };
618-
support::generate(_names.data(), _names.size(), _outputNames);
623+
support::generate(_names.data(),
624+
_names.size(),
625+
_outputNames,
626+
[&](auto const& begin, auto const& end, auto value) noexcept {
627+
#if defined(LIBUNICODE_TABLEGEN_FASTBUILD)
628+
if (value.empty())
629+
// This case is happening for unassigned codepoints (and quite a lot)
630+
// We want to keep the names table as small as possible, so we don't
631+
// re-add empty strings to it.
632+
return std::find(begin, end, value);
633+
// Non-empty names are mostly unique (~1.6% are duplicates, that's okay)
634+
return end;
635+
#else
636+
return std::find(begin, end, value);
637+
#endif
638+
});
619639
}
620640
}
621641
} // namespace

src/libunicode/multistage_table_generator.h

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ template <typename T,
5454
typename SourceType,
5555
typename Stage1ElementType,
5656
typename Stage2ElementType,
57+
typename Stage3Finder,
5758
SourceType BlockSize,
5859
SourceType MaxValue = std::numeric_limits<SourceType>::max()>
5960
class multistage_table_generator
@@ -62,6 +63,7 @@ class multistage_table_generator
6263
T const* _input;
6364
size_t _inputSize;
6465
multistage_table<T, SourceType, Stage1ElementType, Stage2ElementType, BlockSize, MaxValue>& _output;
66+
Stage3Finder _stage3Finder;
6567

6668
void generate()
6769
{
@@ -143,7 +145,8 @@ class multistage_table_generator
143145
Stage2ElementType get_or_create_stage3_index(SourceType stage1Index)
144146
{
145147
auto& properties = _output.stage3;
146-
auto const propertyIterator = find(properties.begin(), properties.end(), _input[stage1Index]);
148+
auto const propertyIterator =
149+
_stage3Finder(properties.begin(), properties.end(), _input[stage1Index]);
147150
if (propertyIterator != properties.end())
148151
return static_cast<Stage2ElementType>(distance(properties.begin(), propertyIterator));
149152

@@ -158,17 +161,24 @@ template <typename T,
158161
typename SourceType,
159162
typename Stage1ElementType,
160163
typename Stage2ElementType,
164+
typename Stage3Finder,
161165
SourceType BlockSize,
162166
SourceType MaxValue = std::numeric_limits<SourceType>::max()>
163167
void generate(
164168
T const* input,
165169
size_t inputSize,
166-
multistage_table<T, SourceType, Stage1ElementType, Stage2ElementType, BlockSize, MaxValue>& output)
170+
multistage_table<T, SourceType, Stage1ElementType, Stage2ElementType, BlockSize, MaxValue>& output,
171+
Stage3Finder&& stage3Finder)
167172
{
168-
auto builder =
169-
multistage_table_generator<T, SourceType, Stage1ElementType, Stage2ElementType, BlockSize, MaxValue> {
170-
input, inputSize, output
171-
};
173+
auto builder = multistage_table_generator<T,
174+
SourceType,
175+
Stage1ElementType,
176+
Stage2ElementType,
177+
Stage3Finder,
178+
BlockSize,
179+
MaxValue> {
180+
input, inputSize, output, std::forward<Stage3Finder>(stage3Finder)
181+
};
172182
builder.generate();
173183
}
174184

src/libunicode/tablegen.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ int main(int argc, char const* argv[])
207207
auto headerFile = std::ofstream(cxxHeaderFileName);
208208
auto implementationFile = std::ofstream(cxxImplementationFileName);
209209
auto namesFile = std::ofstream(cxxNamesFileName);
210-
auto const [props, names] = unicode::load_from_directory(ucdDataDirectory, &std::cout);
210+
auto const [props, names] = unicode::load_from_directory(ucdDataDirectory, &std::clog);
211211

212212
write_cxx_tables(props, names, headerFile, implementationFile, namesFile, namespaceName);
213213

0 commit comments

Comments
 (0)