Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .build-tools/containers/README.md

This file was deleted.

25 changes: 0 additions & 25 deletions .build-tools/containers/build.sh

This file was deleted.

1 change: 0 additions & 1 deletion .build-tools/containers/buildenv/Dockerfile

This file was deleted.

11 changes: 0 additions & 11 deletions .build-tools/containers/buildenv/run.sh

This file was deleted.

27 changes: 0 additions & 27 deletions .build-tools/containers/deploy.sh

This file was deleted.

36 changes: 0 additions & 36 deletions .build-tools/containers/docs/Dockerfile

This file was deleted.

6 changes: 0 additions & 6 deletions .build-tools/containers/manifest.sh

This file was deleted.

24 changes: 0 additions & 24 deletions .build-tools/containers/run.sh

This file was deleted.

44 changes: 13 additions & 31 deletions include/hicr/backends/cloudr/instanceManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,30 +124,24 @@ class InstanceManager final : public HiCR::InstanceManager
// Increasing cloudr instance Id
instanceIdCounter++;
}
// printf("[CloudR] Worker %lu finished.\n", _rpcEngine->getInstanceManager()->getCurrentInstance()->getId());

///// Now deploying

// If I'm worker, all I need to do is listen for incoming RPCs
if (_rpcEngine->getInstanceManager()->getCurrentInstance()->isRootInstance() == false)
{
while (_continueListening)
{
// printf("[CloudR] Worker %lu listening...\n", _rpcEngine->getInstanceManager()->getCurrentInstance()->getId());
_rpcEngine->listen();
// printf("[CloudR] Worker %lu back from listening...\n", _rpcEngine->getInstanceManager()->getCurrentInstance()->getId());
}
while (_continueListening) { _rpcEngine->listen(); }
}
else // If I am root, do the following instead
{
// Gather the topologies of all other instances
for (auto &instance : _freeInstances)
{
// Requesting the root
_rpcEngine->requestRPC(*instance, __CLOUDR_GATHER_TOPOLOGIES_RPC_NAME);
_rpcEngine->requestRPC(instance->getId(), __CLOUDR_GATHER_TOPOLOGIES_RPC_NAME);

// Getting return value (topology)
auto returnValue = _rpcEngine->getReturnValue(*instance);
auto returnValue = _rpcEngine->getReturnValue();

// Receiving raw serialized topology information from the worker
std::string serializedTopology = (char *)returnValue->getPointer();
Expand All @@ -164,8 +158,6 @@ class InstanceManager final : public HiCR::InstanceManager

// Then go straight to the entry point
_entryPoint();

// printf("[Root %lu] Exited entry point...\n", _rpcEngine->getInstanceManager()->getCurrentInstance()->getId());
}
}

Expand All @@ -183,30 +175,28 @@ class InstanceManager final : public HiCR::InstanceManager
_rpcEngine->submitReturnValue((void *)&returnOkMessage, sizeof(returnOkMessage));
}

__INLINE__ void terminateInstanceImpl(const std::shared_ptr<HiCR::Instance> instance) override
{
__INLINE__ void terminateInstanceImpl(const std::shared_ptr<HiCR::Instance> instance) override
{
// Requesting relinquish RPC execution on the requested instance
_rpcEngine->requestRPC(*instance, __CLOUDR_RELINQUISH_INSTANCE_RPC_NAME);
_rpcEngine->requestRPC(instance->getId(), __CLOUDR_RELINQUISH_INSTANCE_RPC_NAME);

// Getting return value. It's enough to know a value was returned to know it is idling
const auto returnValue = _rpcEngine->getReturnValue(*instance);
const auto returnValue = _rpcEngine->getReturnValue();

// Adding instance back to free instances
_freeInstances.insert(_baseIdsToCloudrInstanceMap[instance->getId()]);
}
}

/**
* Finalization procedure. Send rpc termination to all the non root instances
*/
__INLINE__ void finalize() override
{
// printf("[Instance %lu] Finalizing CloudR...\n", _rpcEngine->getInstanceManager()->getCurrentInstance()->getId());

// The following only be ran by the root rank, send an RPC to all others to finalize them
if (_rpcEngine->getInstanceManager()->getCurrentInstance()->isRootInstance())
{
for (auto &instance : _cloudrInstances)
if (instance->isRootInstance() == false) _rpcEngine->requestRPC(*instance, __CLOUDR_FINALIZE_WORKER_RPC_NAME);
if (instance->isRootInstance() == false) _rpcEngine->requestRPC(instance->getId(), __CLOUDR_FINALIZE_WORKER_RPC_NAME);
}
}

Expand Down Expand Up @@ -257,10 +247,6 @@ class InstanceManager final : public HiCR::InstanceManager

__INLINE__ std::shared_ptr<HiCR::Instance> createInstanceImpl(const HiCR::InstanceTemplate instanceTemplate) override
{
// If no more free instances available, fail now
// Commented out because we don't want to fail, simply return a nullptr
// if (_freeInstances.empty()) HICR_THROW_LOGIC("Requested the creation of a new instances, but CloudR has ran out of free instances");

// Creating instance object to return
std::shared_ptr<HiCR::backend::cloudr::Instance> newInstance = nullptr;

Expand All @@ -281,14 +267,11 @@ class InstanceManager final : public HiCR::InstanceManager
break;
}

// Commented out because we don't want to fail, simply return a nullptr
// if (newInstance == nullptr) HICR_THROW_LOGIC("Tried to create new instance but did not find any free instances that meet the required topology");

// If successful, initialize the new instance
if (newInstance != nullptr)
{
// Request the execution of the main driver function
_rpcEngine->requestRPC(*newInstance->getBaseInstance(), __CLOUDR_LAUNCH_ENTRY_POINT_RPC_NAME);
_rpcEngine->requestRPC(newInstance->getBaseInstance()->getId(), __CLOUDR_LAUNCH_ENTRY_POINT_RPC_NAME);
}

// Returning result. Nullptr, if no instance was created
Expand All @@ -308,7 +291,7 @@ class InstanceManager final : public HiCR::InstanceManager
__INLINE__ void requestExchangeGlobalMemorySlots(HiCR::GlobalMemorySlot::tag_t tag)
{
// Asking free instances to run the exchange RPC
for (const auto &instance : _freeInstances) _rpcEngine->requestRPC(*instance, __CLOUDR_EXCHANGE_GLOBAL_MEMORY_SLOTS_RPC_NAME, tag);
for (const auto &instance : _freeInstances) _rpcEngine->requestRPC(instance->getId(), __CLOUDR_EXCHANGE_GLOBAL_MEMORY_SLOTS_RPC_NAME, tag);
}

/**
Expand All @@ -319,7 +302,7 @@ class InstanceManager final : public HiCR::InstanceManager
__INLINE__ void requestFence(HiCR::GlobalMemorySlot::tag_t tag)
{
// Asking free instances to run the exchange RPC
for (const auto &instance : _freeInstances) _rpcEngine->requestRPC(*instance, __CLOUDR_FENCE_RPC_NAME, tag);
for (const auto &instance : _freeInstances) _rpcEngine->requestRPC(instance->getId(), __CLOUDR_FENCE_RPC_NAME, tag);
}

/**
Expand Down Expand Up @@ -349,7 +332,6 @@ class InstanceManager final : public HiCR::InstanceManager
__INLINE__ void finalizeWorker()
{
// Do not continue listening
// printf("[CloudR] Worker %lu running finalizeWorker() RPC.\n", _rpcEngine->getInstanceManager()->getCurrentInstance()->getId());
_continueListening = false;
}

Expand Down Expand Up @@ -393,7 +375,7 @@ class InstanceManager final : public HiCR::InstanceManager
std::vector<std::shared_ptr<HiCR::backend::cloudr::Instance>> _cloudrInstances;

/// A collection of ready-to-use instances currently on standby
std::set<HiCR::backend::cloudr::Instance*> _freeInstances;
std::set<HiCR::backend::cloudr::Instance *> _freeInstances;
}; // class CloudR

} // namespace HiCR::backend::cloudr