Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions src/methods.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "methods.hpp"
#include "scorepy/events.hpp"
#include "scorepy/pathUtils.hpp"
#include "scorepy/pythonHelpers.hpp"
#include <Python.h>
#include <cstdint>
#include <scorep/SCOREP_User_Functions.h>
Expand Down Expand Up @@ -30,19 +31,20 @@ extern "C"
*/
static PyObject* region_begin(PyObject* self, PyObject* args)
{
const char* module;
const char* function_name;
const char* file_name;
scorepy::PythonCString module;
scorepy::PythonCString function_name;
scorepy::PythonCString file_name;
PyObject* identifier = nullptr;
std::uint64_t line_number = 0;

if (!PyArg_ParseTuple(args, "sssKO", &module, &function_name, &file_name, &line_number,
if (!PyArg_ParseTuple(args, "s#s#s#KO", &module.s, &module.l, &function_name.s,
&function_name.l, &file_name.s, &file_name.l, &line_number,
&identifier))
{
return NULL;
}

if (identifier == nullptr or identifier == Py_None)
if (identifier == Py_None)
{
scorepy::region_begin(function_name, module, file_name, line_number);
}
Expand All @@ -60,16 +62,17 @@ extern "C"
*/
static PyObject* region_end(PyObject* self, PyObject* args)
{
const char* module;
const char* function_name;
scorepy::PythonCString module;
scorepy::PythonCString function_name;
PyObject* identifier = nullptr;

if (!PyArg_ParseTuple(args, "ssO", &module, &function_name, &identifier))
if (!PyArg_ParseTuple(args, "s#s#O", &module.s, &module.l, &function_name.s,
&function_name.l, &identifier))
{
return NULL;
}

if (identifier == nullptr or identifier == Py_None)
if (identifier == Py_None)
{
scorepy::region_end(function_name, module);
}
Expand Down
21 changes: 8 additions & 13 deletions src/scorepy/cInstrumenter.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "cInstrumenter.hpp"
#include "cstring.h"
#include "events.hpp"
#include "pythonHelpers.hpp"
#include <algorithm>
Expand Down Expand Up @@ -115,15 +116,12 @@ bool CInstrumenter::on_event(PyFrameObject& frame, int what, PyObject*)
case PyTrace_CALL:
{
const PyCodeObject& code = *frame.f_code;
const char* name = PyUnicode_AsUTF8(code.co_name);
const char* module_name = get_module_name(frame);
assert(name);
assert(module_name);
// TODO: Use string_view/CString comparison?
if (std::string(name) != "_unsetprofile" && std::string(module_name, 0, 6) != "scorep")
const CString name = get_string_from_python(*code.co_name);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about an Initalisert, which takes the frame?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate?

const CString module_name = get_module_name(frame);
if (name != "_unsetprofile" && !module_name.starts_with("scorep"))
{
const int line_number = code.co_firstlineno;
const auto file_name = get_file_name(frame);
const CString file_name = get_file_name(frame);
region_begin(name, module_name, file_name, line_number,
reinterpret_cast<std::uintptr_t>(&code));
}
Expand All @@ -132,12 +130,9 @@ bool CInstrumenter::on_event(PyFrameObject& frame, int what, PyObject*)
case PyTrace_RETURN:
{
const PyCodeObject& code = *frame.f_code;
const char* name = PyUnicode_AsUTF8(code.co_name);
const char* module_name = get_module_name(frame);
assert(name);
assert(module_name);
// TODO: Use string_view/CString comparison?
if (std::string(name) != "_unsetprofile" && std::string(module_name, 0, 6) != "scorep")
const CString name = get_string_from_python(*code.co_name);
const CString module_name = get_module_name(frame);
if (name != "_unsetprofile" && !module_name.starts_with("scorep"))
{
region_end(name, module_name, reinterpret_cast<std::uintptr_t>(&code));
}
Expand Down
57 changes: 57 additions & 0 deletions src/scorepy/cstring.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once

#include <cassert>
#include <cstring>

namespace scorepy
{

/// Thin wrapper around a C-String (NULL-terminated sequence of chars)
class CString
{
const char* s_;
size_t len_;

public:
template <size_t N>
constexpr CString(const char (&s)[N]) : s_(s), len_(N - 1u)
{
static_assert(N > 0, "Cannot handle empty char array");
}

explicit CString(const char* s, size_t len) : s_(s), len_(len)
{
assert(s_);
}

explicit CString(const char* s) : s_(s), len_(std::strlen(s_))
{
assert(s_);
}

constexpr const char* c_str() const
{
return s_;
}
/// Find the first occurrence of the character and return a pointer to it or NULL if not found
const char* find(char c) const
{
return static_cast<const char*>(std::memchr(s_, c, len_));
}
template <size_t N>
bool starts_with(const char (&prefix)[N]) const
{
return (len_ >= N - 1u) && (std::memcmp(s_, prefix, N - 1u) == 0);
}

friend bool operator==(const CString& lhs, const CString& rhs)
{
return (lhs.len_ == rhs.len_) && (std::memcmp(lhs.s_, rhs.s_, rhs.len_) == 0);
}
friend bool operator!=(const CString& lhs, const CString& rhs)
{
return !(lhs == rhs);
}
};

} // namespace scorepy
49 changes: 30 additions & 19 deletions src/scorepy/events.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,49 +37,60 @@ static const std::array<std::string, 2> EXIT_REGION_WHITELIST = {
#endif
};

static region_handle create_user_region(const std::string& region_name, const CString module,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why std::string for region_name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this is what you have when calling the function. I could add an overloaded constructor to CString so it can be implicitely (or better explicitely) converted from a std::string if you like.

const CString file_name, const std::uint64_t line_number)
{
region_handle handle;
SCOREP_User_RegionInit(&handle.value, NULL, NULL, region_name.c_str(),
SCOREP_USER_REGION_TYPE_FUNCTION, file_name.c_str(), line_number);

// Extract main module name if module is like "mainmodule.submodule.subsubmodule"
const char* dot_pos = module.find('.');
if (dot_pos)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to move this to PythonCString.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain what you want to move exactly? Keep in mind that allocations should be avoided hence a string copy must only be done when required, in this case when a dot was found

{
const std::string main_module(module.c_str(), dot_pos);
SCOREP_User_RegionSetGroup(handle.value, main_module.c_str());
}
else
{
SCOREP_User_RegionSetGroup(handle.value, module.c_str());
}
return handle;
}

// Used for regions, that have an identifier, aka a code object id. (instrumenter regions and
// some decorated regions)
void region_begin(const std::string& function_name, const std::string& module,
const std::string& file_name, const std::uint64_t line_number,
const std::uintptr_t& identifier)
void region_begin(const CString function_name, const CString module, const CString file_name,
const std::uint64_t line_number, const std::uintptr_t& identifier)
{
auto& region_handle = regions[identifier];

if (region_handle == uninitialised_region_handle)
{
auto& region_name = make_region_name(module, function_name);
SCOREP_User_RegionInit(&region_handle.value, NULL, NULL, region_name.c_str(),
SCOREP_USER_REGION_TYPE_FUNCTION, file_name.c_str(), line_number);

SCOREP_User_RegionSetGroup(region_handle.value,
std::string(module, 0, module.find('.')).c_str());
const auto& region_name = make_region_name(module, function_name);
region_handle = create_user_region(region_name, module, file_name, line_number);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please either rename to scorep_create_user_region or use the initial code here, what I would prefer, as we would have all the Score-P calls on one level.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point is that we duplicate the code on how a user region is created and that code is slightly more than the 2 SCOREP_* calls if you want to avoid useless memory allocations: std::string(module, 0, module.find('.')) copies the string even when module was a std::string already and no dot is found which is what this PR strives to avoid. Encapsulating duplicate code is what functions are for.

And why scorep_create_user_region? What is not clear from the current name? And why the scorep_ prefix which makes it look like a Score-P call while it is a scorepy call creating a user region. Maybe create_scorep_user_region if you want to be more explicit?

}
SCOREP_User_RegionEnter(region_handle.value);
}

// Used for regions, that only have a function name, a module, a file and a line number (user
// regions)
void region_begin(const std::string& function_name, const std::string& module,
const std::string& file_name, const std::uint64_t line_number)
void region_begin(const CString function_name, const CString module, const CString file_name,
const std::uint64_t line_number)
{
const auto& region_name = make_region_name(module, function_name);
auto& region_handle = user_regions[region_name];

if (region_handle == uninitialised_region_handle)
{
SCOREP_User_RegionInit(&region_handle.value, NULL, NULL, region_name.c_str(),
SCOREP_USER_REGION_TYPE_FUNCTION, file_name.c_str(), line_number);

SCOREP_User_RegionSetGroup(region_handle.value,
std::string(module, 0, module.find('.')).c_str());
region_handle = create_user_region(region_name, module, file_name, line_number);
}
SCOREP_User_RegionEnter(region_handle.value);
}

// Used for regions, that have an identifier, aka a code object id. (instrumenter regions and
// some decorated regions)
void region_end(const std::string& function_name, const std::string& module,
const std::uintptr_t& identifier)
void region_end(const CString function_name, const CString module, const std::uintptr_t& identifier)
{
const auto it_region = regions.find(identifier);
if (it_region != regions.end())
Expand All @@ -94,7 +105,7 @@ void region_end(const std::string& function_name, const std::string& module,
}

// Used for regions, that only have a function name, a module (user regions)
void region_end(const std::string& function_name, const std::string& module)
void region_end(const CString function_name, const CString module)
{
auto& region_name = make_region_name(module, function_name);
const auto it_region = user_regions.find(region_name);
Expand Down
21 changes: 10 additions & 11 deletions src/scorepy/events.hpp
Original file line number Diff line number Diff line change
@@ -1,30 +1,29 @@
#pragma once

#include "cstring.h"
#include <cstdint>
#include <string>

namespace scorepy
{
/// Combine the arguments into a region name
/// Return value is a statically allocated string to avoid memory (re)allocations
inline const std::string& make_region_name(const std::string& module_name, const std::string& name)
inline const std::string& make_region_name(const CString module_name, const CString name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As written I would vote for implementing this returning PythonCString

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The result is used to search a map with keys of std::string. So if this should return a CString then CString would need to be implicitely convertible from const std::string& to work with that. Shall I do that?

{
static std::string region;
region = module_name;
region = module_name.c_str();
region += ":";
region += name;
region += name.c_str();
return region;
}

void region_begin(const std::string& function_name, const std::string& module,
const std::string& file_name, const std::uint64_t line_number,
const std::uintptr_t& identifier);
void region_begin(const std::string& function_name, const std::string& module,
const std::string& file_name, const std::uint64_t line_number);
void region_begin(CString function_name, CString module, CString file_name,
const std::uint64_t line_number, const std::uintptr_t& identifier);
void region_begin(CString function_name, CString module, CString file_name,
const std::uint64_t line_number);

void region_end(const std::string& function_name, const std::string& module,
const std::uintptr_t& identifier);
void region_end(const std::string& function_name, const std::string& module);
void region_end(CString function_name, CString module, const std::uintptr_t& identifier);
void region_end(CString function_name, CString module);

void region_end_error_handling(const std::string& region_name);

Expand Down
17 changes: 10 additions & 7 deletions src/scorepy/pythonHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,32 @@

namespace scorepy
{
const char* get_module_name(const PyFrameObject& frame)
CString get_module_name(const PyFrameObject& frame)
{
PyObject* module_name = PyDict_GetItemString(frame.f_globals, "__name__");
if (module_name)
return PyUnicode_AsUTF8(module_name);
return get_string_from_python(*module_name);

// this is a NUMPY special situation, see NEP-18, and Score-P issue #63
// TODO: Use string_view/C-String to avoid creating 2 std::strings
const char* filename = PyUnicode_AsUTF8(frame.f_code->co_filename);
if (filename && (std::string(filename) == "<__array_function__ internals>"))
const CString filename = get_string_from_python(*frame.f_code->co_filename);
if (filename == "<__array_function__ internals>")
{
return "numpy.__array_function__";
}
else
{
return "unkown";
}
}

std::string get_file_name(const PyFrameObject& frame)
CString get_file_name(const PyFrameObject& frame)
{
PyObject* filename = frame.f_code->co_filename;
if (filename == Py_None)
{
return "None";
}
const std::string full_file_name = abspath(PyUnicode_AsUTF8(filename));
return !full_file_name.empty() ? full_file_name : "ErrorPath";
return !full_file_name.empty() ? CStrubg(full_file_name) : CString("ErrorPath");
}
} // namespace scorepy
32 changes: 30 additions & 2 deletions src/scorepy/pythonHelpers.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include "cstring.h"
#include <Python.h>
#include <cassert>
#include <frameobject.h>
#include <string>
#include <type_traits>
Expand Down Expand Up @@ -71,11 +73,37 @@ auto cast_to_PyFunc(TFunc* func) -> detail::ReplaceArgsToPyObject_t<TFunc>*
return reinterpret_cast<detail::ReplaceArgsToPyObject_t<TFunc>*>(func);
}

inline CString get_string_from_python(PyObject& o)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add this as function, or initialiser of PythonCString

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that. However the intention of CString was to keep it generic. This separates concerns. Adding this function would tie it to Python

What is wrong with this function?

{
#if PY_MAJOR_VERSION >= 3
Py_ssize_t len;
const char* s = PyUnicode_AsUTF8AndSize(&o, &len);
return CString(s, len);
#else
const char* s = PyString_AsString(&o);
return CString(s);
#endif
}

/// Pair of a C-String and it's length useful for PyArg_ParseTuple with 's#'
/// Implicitely converts to CString.
struct PythonCString
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not merging CString and PythonCString to PythongCString?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PythonCString is a workaround for the situation where you need a const char* and size pair. CString is a NULL-terminated sequence of chars with a length. According to rules of encapsulation you must not have write access to the members of CString as that could destroy invariants, in this case that len is the length of s. This doesn't hold for PythonCString which is simply for getting a CString from Python using the PyArg_ParseTuple function and unsafe to use further. Hence to be used only directly at the API boundary.

{
const char* s;
Py_ssize_t l;
operator CString() const
{
assert(s);
return CString(s, l);
}
};

/// Return the module name the frame belongs to.
/// The pointer is valid for the lifetime of the frame
const char* get_module_name(const PyFrameObject& frame);
CString get_module_name(const PyFrameObject& frame);
/// Return the file name the frame belongs to
std::string get_file_name(const PyFrameObject& frame);
/// The returned CString is valid until the next call to this function
CString get_file_name(const PyFrameObject& frame);

// Implementation details
namespace detail
Expand Down