From 132d21c875b29c0630047e7e7682c1d19da83b06 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 17 Jun 2020 10:53:03 +0200 Subject: [PATCH 1/2] Add CString class to avoid allocations --- src/methods.cpp | 21 +++++++------ src/scorepy/cInstrumenter.cpp | 21 +++++-------- src/scorepy/cstring.h | 57 +++++++++++++++++++++++++++++++++++ src/scorepy/events.cpp | 49 ++++++++++++++++++------------ src/scorepy/events.hpp | 21 ++++++------- src/scorepy/pythonHelpers.cpp | 17 ++++++----- src/scorepy/pythonHelpers.hpp | 27 +++++++++++++++-- 7 files changed, 152 insertions(+), 61 deletions(-) create mode 100644 src/scorepy/cstring.h diff --git a/src/methods.cpp b/src/methods.cpp index 43755dc..a73a59b 100644 --- a/src/methods.cpp +++ b/src/methods.cpp @@ -1,6 +1,7 @@ #include "methods.hpp" #include "scorepy/events.hpp" #include "scorepy/pathUtils.hpp" +#include "scorepy/pythonHelpers.hpp" #include #include #include @@ -30,19 +31,20 @@ extern "C" */ static PyObject* region_begin(PyObject* self, PyObject* args) { - const char* module; - const char* function_name; - const char* file_name; + scorepy::PythonCString module; + scorepy::PythonCString function_name; + scorepy::PythonCString file_name; PyObject* identifier = nullptr; std::uint64_t line_number = 0; - if (!PyArg_ParseTuple(args, "sssKO", &module, &function_name, &file_name, &line_number, + if (!PyArg_ParseTuple(args, "s#s#s#KO", &module.s, &module.l, &function_name.s, + &function_name.l, &file_name.s, &file_name.l, &line_number, &identifier)) { return NULL; } - if (identifier == nullptr or identifier == Py_None) + if (identifier == Py_None) { scorepy::region_begin(function_name, module, file_name, line_number); } @@ -60,16 +62,17 @@ extern "C" */ static PyObject* region_end(PyObject* self, PyObject* args) { - const char* module; - const char* function_name; + scorepy::PythonCString module; + scorepy::PythonCString function_name; PyObject* identifier = nullptr; - if (!PyArg_ParseTuple(args, "ssO", &module, &function_name, &identifier)) + if (!PyArg_ParseTuple(args, "s#s#O", &module.s, &module.l, &function_name.s, + &function_name.l, &identifier)) { return NULL; } - if (identifier == nullptr or identifier == Py_None) + if (identifier == Py_None) { scorepy::region_end(function_name, module); } diff --git a/src/scorepy/cInstrumenter.cpp b/src/scorepy/cInstrumenter.cpp index d250566..1ca9cae 100644 --- a/src/scorepy/cInstrumenter.cpp +++ b/src/scorepy/cInstrumenter.cpp @@ -1,4 +1,5 @@ #include "cInstrumenter.hpp" +#include "cstring.h" #include "events.hpp" #include "pythonHelpers.hpp" #include @@ -115,15 +116,12 @@ bool CInstrumenter::on_event(PyFrameObject& frame, int what, PyObject*) case PyTrace_CALL: { const PyCodeObject& code = *frame.f_code; - const char* name = PyUnicode_AsUTF8(code.co_name); - const char* module_name = get_module_name(frame); - assert(name); - assert(module_name); - // TODO: Use string_view/CString comparison? - if (std::string(name) != "_unsetprofile" && std::string(module_name, 0, 6) != "scorep") + const CString name = get_string_from_python(*code.co_name); + const CString module_name = get_module_name(frame); + if (name != "_unsetprofile" && !module_name.starts_with("scorep")) { const int line_number = code.co_firstlineno; - const auto file_name = get_file_name(frame); + const CString file_name = get_file_name(frame); region_begin(name, module_name, file_name, line_number, reinterpret_cast(&code)); } @@ -132,12 +130,9 @@ bool CInstrumenter::on_event(PyFrameObject& frame, int what, PyObject*) case PyTrace_RETURN: { const PyCodeObject& code = *frame.f_code; - const char* name = PyUnicode_AsUTF8(code.co_name); - const char* module_name = get_module_name(frame); - assert(name); - assert(module_name); - // TODO: Use string_view/CString comparison? - if (std::string(name) != "_unsetprofile" && std::string(module_name, 0, 6) != "scorep") + const CString name = get_string_from_python(*code.co_name); + const CString module_name = get_module_name(frame); + if (name != "_unsetprofile" && !module_name.starts_with("scorep")) { region_end(name, module_name, reinterpret_cast(&code)); } diff --git a/src/scorepy/cstring.h b/src/scorepy/cstring.h new file mode 100644 index 0000000..fd919e4 --- /dev/null +++ b/src/scorepy/cstring.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +namespace scorepy +{ + +/// Thin wrapper around a C-String (NULL-terminated sequence of chars) +class CString +{ + const char* s_; + size_t len_; + +public: + template + constexpr CString(const char (&s)[N]) : s_(s), len_(N - 1u) + { + static_assert(N > 0, "Cannot handle empty char array"); + } + + explicit CString(const char* s, size_t len) : s_(s), len_(len) + { + assert(s_); + } + + explicit CString(const char* s) : s_(s), len_(std::strlen(s_)) + { + assert(s_); + } + + constexpr const char* c_str() const + { + return s_; + } + /// Find the first occurrence of the character and return a pointer to it or NULL if not found + const char* find(char c) const + { + return static_cast(std::memchr(s_, c, len_)); + } + template + bool starts_with(const char (&prefix)[N]) const + { + return (len_ >= N - 1u) && (std::memcmp(s_, prefix, N - 1u) == 0); + } + + friend bool operator==(const CString& lhs, const CString& rhs) + { + return (lhs.len_ == rhs.len_) && (std::memcmp(lhs.s_, rhs.s_, rhs.len_) == 0); + } + friend bool operator!=(const CString& lhs, const CString& rhs) + { + return !(lhs == rhs); + } +}; + +} // namespace scorepy diff --git a/src/scorepy/events.cpp b/src/scorepy/events.cpp index 5b2e378..5dcfdf0 100644 --- a/src/scorepy/events.cpp +++ b/src/scorepy/events.cpp @@ -37,49 +37,60 @@ static const std::array EXIT_REGION_WHITELIST = { #endif }; +static region_handle create_user_region(const std::string& region_name, const CString module, + const CString file_name, const std::uint64_t line_number) +{ + region_handle handle; + SCOREP_User_RegionInit(&handle.value, NULL, NULL, region_name.c_str(), + SCOREP_USER_REGION_TYPE_FUNCTION, file_name.c_str(), line_number); + + // Extract main module name if module is like "mainmodule.submodule.subsubmodule" + const char* dot_pos = module.find('.'); + if (dot_pos) + { + const std::string main_module(module.c_str(), dot_pos); + SCOREP_User_RegionSetGroup(handle.value, main_module.c_str()); + } + else + { + SCOREP_User_RegionSetGroup(handle.value, module.c_str()); + } + return handle; +} + // Used for regions, that have an identifier, aka a code object id. (instrumenter regions and // some decorated regions) -void region_begin(const std::string& function_name, const std::string& module, - const std::string& file_name, const std::uint64_t line_number, - const std::uintptr_t& identifier) +void region_begin(const CString function_name, const CString module, const CString file_name, + const std::uint64_t line_number, const std::uintptr_t& identifier) { auto& region_handle = regions[identifier]; if (region_handle == uninitialised_region_handle) { - auto& region_name = make_region_name(module, function_name); - SCOREP_User_RegionInit(®ion_handle.value, NULL, NULL, region_name.c_str(), - SCOREP_USER_REGION_TYPE_FUNCTION, file_name.c_str(), line_number); - - SCOREP_User_RegionSetGroup(region_handle.value, - std::string(module, 0, module.find('.')).c_str()); + const auto& region_name = make_region_name(module, function_name); + region_handle = create_user_region(region_name, module, file_name, line_number); } SCOREP_User_RegionEnter(region_handle.value); } // Used for regions, that only have a function name, a module, a file and a line number (user // regions) -void region_begin(const std::string& function_name, const std::string& module, - const std::string& file_name, const std::uint64_t line_number) +void region_begin(const CString function_name, const CString module, const CString file_name, + const std::uint64_t line_number) { const auto& region_name = make_region_name(module, function_name); auto& region_handle = user_regions[region_name]; if (region_handle == uninitialised_region_handle) { - SCOREP_User_RegionInit(®ion_handle.value, NULL, NULL, region_name.c_str(), - SCOREP_USER_REGION_TYPE_FUNCTION, file_name.c_str(), line_number); - - SCOREP_User_RegionSetGroup(region_handle.value, - std::string(module, 0, module.find('.')).c_str()); + region_handle = create_user_region(region_name, module, file_name, line_number); } SCOREP_User_RegionEnter(region_handle.value); } // Used for regions, that have an identifier, aka a code object id. (instrumenter regions and // some decorated regions) -void region_end(const std::string& function_name, const std::string& module, - const std::uintptr_t& identifier) +void region_end(const CString function_name, const CString module, const std::uintptr_t& identifier) { const auto it_region = regions.find(identifier); if (it_region != regions.end()) @@ -94,7 +105,7 @@ void region_end(const std::string& function_name, const std::string& module, } // Used for regions, that only have a function name, a module (user regions) -void region_end(const std::string& function_name, const std::string& module) +void region_end(const CString function_name, const CString module) { auto& region_name = make_region_name(module, function_name); const auto it_region = user_regions.find(region_name); diff --git a/src/scorepy/events.hpp b/src/scorepy/events.hpp index 43400ca..cda2f5b 100644 --- a/src/scorepy/events.hpp +++ b/src/scorepy/events.hpp @@ -1,5 +1,6 @@ #pragma once +#include "cstring.h" #include #include @@ -7,24 +8,22 @@ namespace scorepy { /// Combine the arguments into a region name /// Return value is a statically allocated string to avoid memory (re)allocations -inline const std::string& make_region_name(const std::string& module_name, const std::string& name) +inline const std::string& make_region_name(const CString module_name, const CString name) { static std::string region; - region = module_name; + region = module_name.c_str(); region += ":"; - region += name; + region += name.c_str(); return region; } -void region_begin(const std::string& function_name, const std::string& module, - const std::string& file_name, const std::uint64_t line_number, - const std::uintptr_t& identifier); -void region_begin(const std::string& function_name, const std::string& module, - const std::string& file_name, const std::uint64_t line_number); +void region_begin(CString function_name, CString module, CString file_name, + const std::uint64_t line_number, const std::uintptr_t& identifier); +void region_begin(CString function_name, CString module, CString file_name, + const std::uint64_t line_number); -void region_end(const std::string& function_name, const std::string& module, - const std::uintptr_t& identifier); -void region_end(const std::string& function_name, const std::string& module); +void region_end(CString function_name, CString module, const std::uintptr_t& identifier); +void region_end(CString function_name, CString module); void region_end_error_handling(const std::string& region_name); diff --git a/src/scorepy/pythonHelpers.cpp b/src/scorepy/pythonHelpers.cpp index 07c87a9..9ca30b6 100644 --- a/src/scorepy/pythonHelpers.cpp +++ b/src/scorepy/pythonHelpers.cpp @@ -4,22 +4,25 @@ namespace scorepy { -const char* get_module_name(const PyFrameObject& frame) +CString get_module_name(const PyFrameObject& frame) { PyObject* module_name = PyDict_GetItemString(frame.f_globals, "__name__"); if (module_name) - return PyUnicode_AsUTF8(module_name); + return get_string_from_python(*module_name); // this is a NUMPY special situation, see NEP-18, and Score-P issue #63 - // TODO: Use string_view/C-String to avoid creating 2 std::strings - const char* filename = PyUnicode_AsUTF8(frame.f_code->co_filename); - if (filename && (std::string(filename) == "<__array_function__ internals>")) + const CString filename = get_string_from_python(*frame.f_code->co_filename); + if (filename == "<__array_function__ internals>") + { return "numpy.__array_function__"; + } else + { return "unkown"; + } } -std::string get_file_name(const PyFrameObject& frame) +CString get_file_name(const PyFrameObject& frame) { PyObject* filename = frame.f_code->co_filename; if (filename == Py_None) @@ -27,6 +30,6 @@ std::string get_file_name(const PyFrameObject& frame) return "None"; } const std::string full_file_name = abspath(PyUnicode_AsUTF8(filename)); - return !full_file_name.empty() ? full_file_name : "ErrorPath"; + return !full_file_name.empty() ? CStrubg(full_file_name) : CString("ErrorPath"); } } // namespace scorepy diff --git a/src/scorepy/pythonHelpers.hpp b/src/scorepy/pythonHelpers.hpp index 936e87b..f182a1e 100644 --- a/src/scorepy/pythonHelpers.hpp +++ b/src/scorepy/pythonHelpers.hpp @@ -1,6 +1,8 @@ #pragma once +#include "cstring.h" #include +#include #include #include #include @@ -71,11 +73,32 @@ auto cast_to_PyFunc(TFunc* func) -> detail::ReplaceArgsToPyObject_t* return reinterpret_cast*>(func); } +inline CString get_string_from_python(PyObject& o) +{ + Py_ssize_t len; + const char* s = PyUnicode_AsUTF8AndSize(&o, &len); + return CString(s, len); +} + +/// Pair of a C-String and it's length useful for PyArg_ParseTuple with 's#' +/// Implicitely converts to CString. +struct PythonCString +{ + const char* s; + Py_ssize_t l; + operator CString() const + { + assert(s); + return CString(s, l); + } +}; + /// Return the module name the frame belongs to. /// The pointer is valid for the lifetime of the frame -const char* get_module_name(const PyFrameObject& frame); +CString get_module_name(const PyFrameObject& frame); /// Return the file name the frame belongs to -std::string get_file_name(const PyFrameObject& frame); +/// The returned CString is valid until the next call to this function +CString get_file_name(const PyFrameObject& frame); // Implementation details namespace detail From 8556089fa090e8c73ce341a9d5443cb56a940457 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 21 Jul 2020 09:33:33 +0200 Subject: [PATCH 2/2] Make get_string_from_python Python2 compatible --- src/scorepy/pythonHelpers.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/scorepy/pythonHelpers.hpp b/src/scorepy/pythonHelpers.hpp index f182a1e..d3065d4 100644 --- a/src/scorepy/pythonHelpers.hpp +++ b/src/scorepy/pythonHelpers.hpp @@ -75,9 +75,14 @@ auto cast_to_PyFunc(TFunc* func) -> detail::ReplaceArgsToPyObject_t* inline CString get_string_from_python(PyObject& o) { +#if PY_MAJOR_VERSION >= 3 Py_ssize_t len; const char* s = PyUnicode_AsUTF8AndSize(&o, &len); return CString(s, len); +#else + const char* s = PyString_AsString(&o); + return CString(s); +#endif } /// Pair of a C-String and it's length useful for PyArg_ParseTuple with 's#'