diff --git a/CMakeLists.txt b/CMakeLists.txt index 6194e9f..9aae14c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,7 +76,7 @@ add_definitions( -D_FILE_OFFSET_BITS=64 ) # On Linux, we hide all the symbols for the final libraries, exposing only what's needed for the XRootD # runtime loader. So here we create the object library and will create a separate one for testing with # the symbols exposed. -add_library(XrdS3Obj OBJECT src/CurlUtil.cc src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdS3Obj OBJECT src/CurlUtil.cc src/S3File.cc src/S3Directory.cc src/HTTPDirectory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) set_target_properties(XrdS3Obj PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(XrdS3Obj PRIVATE ${XRootD_INCLUDE_DIRS}) target_link_libraries( XrdS3Obj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto tinyxml2::tinyxml2 Threads::Threads std::filesystem std::atomic ) @@ -84,10 +84,10 @@ target_link_libraries( XrdS3Obj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRAR add_library(XrdS3 MODULE "$") target_link_libraries(XrdS3 XrdS3Obj) -add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPDirectory.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) set_target_properties(XrdHTTPServerObj PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(XrdHTTPServerObj PRIVATE ${XRootD_INCLUDE_DIRS}) -target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto Threads::Threads std::filesystem) +target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl tinyxml2::tinyxml2 OpenSSL::Crypto Threads::Threads std::filesystem) add_library(XrdHTTPServer MODULE "$") target_link_libraries(XrdHTTPServer XrdHTTPServerObj) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index d43a10c..476116c 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -140,7 +140,8 @@ bool HTTPRequest::SendHTTPRequest(const std::string &payload) { return false; } - headers["Content-Type"] = "binary/octet-stream"; + if (headers.find("Content-Type") == headers.end()) + headers["Content-Type"] = "binary/octet-stream"; return sendPreparedRequest(hostUrl, payload, payload.size(), true); } @@ -468,6 +469,16 @@ bool HTTPRequest::SetupHandle(CURL *curl) { } } + if (httpVerb == "PROPFIND") { + rv = curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PROPFIND"); + if (rv != CURLE_OK) { + this->errorCode = "E_CURL_LIB"; + this->errorMessage = + "curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST) failed."; + return false; + } + } + if (httpVerb == "POST") { rv = curl_easy_setopt(curl, CURLOPT_POST, 1); if (rv != CURLE_OK) { @@ -475,7 +486,9 @@ bool HTTPRequest::SetupHandle(CURL *curl) { this->errorMessage = "curl_easy_setopt( CURLOPT_POST ) failed."; return false; } + } + if (httpVerb == "POST" || httpVerb == "PROPFIND") { rv = curl_easy_setopt(curl, CURLOPT_POSTFIELDS, m_payload.data()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; @@ -817,6 +830,47 @@ bool HTTPDownload::SendRequest(off_t offset, size_t size) { // --------------------------------------------------------------------------- +HTTPList::~HTTPList() {} + +bool HTTPList::SendRequest() { + this->expectedResponseCode = 200; + + httpVerb = "GET"; + std::string noPayloadAllowed; + return SendHTTPRequest(noPayloadAllowed); +} + +// --------------------------------------------------------------------------- + +HTTPPropfind::~HTTPPropfind() {} + +bool HTTPPropfind::SendRequest() { + httpVerb = "PROPFIND"; + headers["Depth"] = "1"; + headers["Content-Type"] = "application/xml"; + + if (!object.empty()) { + if (hostUrl.back() != '/' && object.front() != '/') { + hostUrl += '/'; + } + hostUrl += object; + } + + std::string payload = "" + " " + " " + " " + " " + " " + ""; + + expectedResponseCode = 207; + + return SendHTTPRequest(payload); +} + +// --------------------------------------------------------------------------- + HTTPHead::~HTTPHead() {} bool HTTPHead::SendRequest() { diff --git a/src/HTTPCommands.hh b/src/HTTPCommands.hh index 47a3d6a..f74681c 100644 --- a/src/HTTPCommands.hh +++ b/src/HTTPCommands.hh @@ -321,6 +321,35 @@ class HTTPDownload : public HTTPRequest { std::string object; }; +class HTTPList : public HTTPRequest { + public: + HTTPList(const std::string &h, const std::string &o, XrdSysError &log, + const TokenFile *token) + : HTTPRequest(h, log, token), object(o) { + hostUrl = hostUrl + "/" + object; + } + + virtual ~HTTPList(); + + virtual bool SendRequest(); + + protected: + std::string object; +}; + +class HTTPPropfind : public HTTPRequest { + public: + HTTPPropfind(const std::string &h, const std::string &o, XrdSysError &log, + const TokenFile *token) + : HTTPRequest(h, log, token), object(o) {} + + virtual ~HTTPPropfind(); + + virtual bool SendRequest(); + + std::string object; +}; + class HTTPHead : public HTTPRequest { public: HTTPHead(const std::string &h, const std::string &o, XrdSysError &log, diff --git a/src/HTTPDirectory.cc b/src/HTTPDirectory.cc new file mode 100644 index 0000000..172c8b2 --- /dev/null +++ b/src/HTTPDirectory.cc @@ -0,0 +1,361 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#include "HTTPDirectory.hh" +#include "HTTPCommands.hh" +#include "HTTPFile.hh" +#include "HTTPFileSystem.hh" +#include "logging.hh" +#include "stl_string_utils.hh" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +HTTPDirectory::HTTPDirectory(XrdSysError &log, HTTPFileSystem *oss) + : m_log(log), m_oss(oss), m_bytesReturned(0) {} + +std::map +HTTPDirectory::parseWebDAVToFSSpecString(const std::string &content) { + using namespace tinyxml2; + std::map remoteList; + + XMLDocument doc; + XMLError error = doc.Parse(content.c_str()); + if (error != XML_SUCCESS) { + std::cerr << "Failed to parse WebDAV XML response!" << std::endl; + return remoteList; + } + + XMLElement *multistatus = doc.FirstChildElement("ns0:multistatus"); + if (!multistatus) { + std::cerr << "No multistatus element found in WebDAV response!" + << std::endl; + return remoteList; + } + + for (XMLElement *response = multistatus->FirstChildElement("ns0:response"); + response != nullptr; + response = response->NextSiblingElement("ns0:response")) { + + XMLElement *href = response->FirstChildElement("ns0:href"); + if (!href || !href->GetText()) { + continue; + } + + std::string path = href->GetText(); + + struct stat fileStat = {}; + fileStat.st_nlink = 1; + fileStat.st_uid = 1; + fileStat.st_gid = 1; + fileStat.st_atime = 0; + fileStat.st_ctime = 0; + fileStat.st_dev = 0; + fileStat.st_ino = 0; + fileStat.st_mode = 0600 | S_IFREG; + + time_t modTime = 0; + off_t fileSize = 0; + + for (XMLElement *propstat = response->FirstChildElement("ns0:propstat"); + propstat != nullptr; + propstat = propstat->NextSiblingElement("ns0:propstat")) { + + // Check status first - we only want properties with 200 OK status + XMLElement *status = propstat->FirstChildElement("ns0:status"); + if (!status || !status->GetText() || + std::string(status->GetText()).find("200 OK") == + std::string::npos) { + continue; + } + + XMLElement *prop = propstat->FirstChildElement("ns0:prop"); + if (!prop) + continue; + + XMLElement *resourceType = + prop->FirstChildElement("ns0:resourcetype"); + if (resourceType && + resourceType->FirstChildElement("ns0:collection")) { + fileStat.st_mode = 0600 | S_IFDIR; + } + + XMLElement *lastModified = + prop->FirstChildElement("ns0:getlastmodified"); + if (lastModified && lastModified->GetText()) { + std::string modifiedStr = lastModified->GetText(); + + struct tm tm = {}; + char month[4] = {}; + int day, year, hour, min, sec; + + if (sscanf(modifiedStr.c_str(), "%*[^,], %d %3s %d %d:%d:%d", + &day, month, &year, &hour, &min, &sec) == 6) { + + const char *months[] = {"Jan", "Feb", "Mar", "Apr", + "May", "Jun", "Jul", "Aug", + "Sep", "Oct", "Nov", "Dec"}; + int monthNum = 0; + for (int i = 0; i < 12; i++) { + if (strcmp(month, months[i]) == 0) { + monthNum = i; + break; + } + } + + tm.tm_year = year - 1900; + tm.tm_mon = monthNum; + tm.tm_mday = day; + tm.tm_hour = hour; + tm.tm_min = min; + tm.tm_sec = sec; + + modTime = mktime(&tm); + } + } + + // Get content length + XMLElement *contentLength = + prop->FirstChildElement("ns0:getcontentlength"); + if (contentLength && contentLength->GetText()) { + try { + fileSize = std::stoll(contentLength->GetText()); + } catch (std::exception &e) { + fileSize = 0; + } + } + } + + fileStat.st_size = fileSize; + fileStat.st_mtime = modTime; + std::string name = path; + + if (!name.empty() && name.back() == '/') { + name.pop_back(); + } + + size_t lastSlash = name.find_last_of('/'); + if (lastSlash != std::string::npos) { + name = name.substr(lastSlash + 1); + } + + if (name.empty() && path.length() > 0 && path[0] == '/') { + name = "/"; + } + + remoteList[name] = fileStat; + } + + return remoteList; +} + +std::map +HTTPDirectory::parseHTMLToFSSpecString(const std::string &htmlContent) { + using namespace tinyxml2; + std::map remoteList; + + XMLDocument doc; + XMLError error = doc.Parse(htmlContent.c_str()); + if (error != XML_SUCCESS) { + std::cerr << "Failed to parse HTML!" << std::endl; + return remoteList; + } + + // Root of the HTML document + XMLNode *root = doc.FirstChild(); + if (!root) { + std::cerr << "No root found in HTML!" << std::endl; + return remoteList; + } + + // Traverse the rows in the table + for (XMLElement *row = root->FirstChildElement("tr"); row != nullptr; + row = row->NextSiblingElement("tr")) { + FSSpecEntry entry; + int columnIndex = 0; + + // Traverse each cell in the row + for (XMLElement *cell = row->FirstChildElement("td"); cell != nullptr; + cell = cell->NextSiblingElement("td")) { + const char *cellText = cell->GetText() ? cell->GetText() : ""; + + switch (columnIndex) { + case 0: // Mode + entry.mode = cellText; + break; + case 1: // Flags + entry.flags = cellText; + break; + case 2: // Size + entry.size = cellText; + break; + case 3: // Modified + entry.modified = cellText; + break; + case 4: // Name + if (XMLElement *aTag = cell->FirstChildElement("a")) { + const char *nameText = + aTag->GetText() ? aTag->GetText() : ""; + entry.name = nameText; + } + break; + default: + break; + } + columnIndex++; + } + + // Skip adding invalid/empty rows + if (entry.name.empty()) { + continue; + } + + struct stat workingFile; + workingFile.st_size = std::stoul(entry.size, nullptr, 10); + // workingFile.st_mtime = std::stoul(entry.modified, nullptr, 10); + if (entry.mode.substr(0, 1) == "d") + workingFile.st_mode = 0600 | S_IFDIR; + else + workingFile.st_mode = 0600 | S_IFREG; + + workingFile.st_nlink = 1; + workingFile.st_uid = 1; + workingFile.st_gid = 1; + workingFile.st_atime = 0; + workingFile.st_ctime = 0; + workingFile.st_dev = 0; + workingFile.st_ino = 0; + remoteList[entry.name] = workingFile; + } + + return remoteList; // Return the formatted list +} + +std::string HTTPDirectory::extractHTMLTable(const std::string &htmlContent) { + std::regex tableRegex(R"(]*>[\s\S]*?)", + std::regex_constants::icase); + + std::smatch match; + if (std::regex_search(htmlContent, match, tableRegex)) { + return match.str(); + } + + return ""; // Return an empty string if no table is found +} + +int HTTPDirectory::Readdir(char *buff, int blen) { + if (m_remoteList.size() > 0) { + std::string name = m_remoteList.begin()->first; + struct stat currentRecord = m_remoteList.begin()->second; + mystat->st_size = currentRecord.st_size; + mystat->st_mode = currentRecord.st_mode; + mystat->st_nlink = currentRecord.st_nlink; + mystat->st_uid = currentRecord.st_uid; + mystat->st_gid = currentRecord.st_gid; + mystat->st_atime = currentRecord.st_atime; + mystat->st_ctime = currentRecord.st_ctime; + mystat->st_dev = currentRecord.st_dev; + mystat->st_ino = currentRecord.st_ino; + memcpy(buff, name.c_str(), name.size() + 1); + m_remoteList.erase(m_remoteList.begin()); + return name.size(); + } else { + buff[0] = '\0'; + return 0; + } +} + +int HTTPDirectory::Opendir(const char *path, XrdOucEnv &env) { + m_log.Log(LogMask::Debug, "HTTPDirectory::Opendir", "Opendir called"); + auto configured_hostname = m_oss->getHTTPHostName(); + auto configured_hostUrl = m_oss->getHTTPHostUrl(); + const auto &configured_url_base = m_oss->getHTTPUrlBase(); + if (!configured_url_base.empty()) { + configured_hostUrl = configured_url_base; + configured_hostname = m_oss->getStoragePrefix(); + } + + // + // Check the path for validity. + // + std::string object; + int rv = parse_path(configured_hostname, path, object); + + if (rv != 0) { + return rv; + } + + m_object = object; + m_hostname = configured_hostname; + m_hostUrl = configured_hostUrl; + m_remote_flavor = m_oss->getRemoteFlavor(); + + if (m_remoteList.empty()) { + m_log.Log(LogMask::Debug, "HTTPDirectory::Opendir", + "About to perform download from HTTPDirectory::Opendir(): " + "hostname / object:", + m_hostname.c_str(), m_object.c_str()); + if (m_oss->getRemoteFlavor() == "webdav") { + HTTPPropfind request = + HTTPPropfind(m_hostUrl, m_object, m_log, m_oss->getToken()); + if (!request.SendRequest()) { + std::stringstream ss; + ss << "Failed to send GetObject command: " + << request.getResponseCode() << "'" + << request.getResultString() << "'"; + m_log.Log(LogMask::Warning, "HTTPDirectory::Opendir", + ss.str().c_str()); + return 0; + } + m_remoteList = parseWebDAVToFSSpecString(request.getResultString()); + + } else { + HTTPList request = + HTTPList(m_hostUrl, m_object, m_log, m_oss->getToken()); + if (!request.SendRequest()) { + std::stringstream ss; + ss << "Failed to send GetObject command: " + << request.getResponseCode() << "'" + << request.getResultString() << "'"; + m_log.Log(LogMask::Warning, "HTTPDirectory::Opendir", + ss.str().c_str()); + return 0; + } + m_remoteList = parseHTMLToFSSpecString( + extractHTMLTable(request.getResultString())); + } + } + return 0; +} diff --git a/src/HTTPDirectory.hh b/src/HTTPDirectory.hh index 7e1068c..c6bd9d0 100644 --- a/src/HTTPDirectory.hh +++ b/src/HTTPDirectory.hh @@ -18,27 +18,53 @@ #pragma once +#include "HTTPFileSystem.hh" #include "XrdOss/XrdOss.hh" #include "XrdOuc/XrdOucEnv.hh" +#include "logging.hh" +#include +#include -class XrdSysError; +using namespace XrdHTTPServer; class HTTPDirectory : public XrdOssDF { public: - HTTPDirectory(XrdSysError &log) : m_log(log) {} - + HTTPDirectory(XrdSysError &log, HTTPFileSystem *oss); virtual ~HTTPDirectory() {} - virtual int Opendir(const char *path, XrdOucEnv &env) override { - return -ENOSYS; - } + virtual int Opendir(const char *path, XrdOucEnv &env) override; - virtual int Readdir(char *buff, int blen) override { return -ENOSYS; } + virtual int Readdir(char *buff, int blen) override; - virtual int StatRet(struct stat *statStruct) override { return -ENOSYS; } + virtual int StatRet(struct stat *statStruct) override { + mystat = statStruct; + return SFS_OK; + } virtual int Close(long long *retsz = 0) override { return -ENOSYS; } protected: + struct FSSpecEntry { + std::string mode; + std::string flags; + std::string size; + std::string modified; + std::string name; + }; + + std::map + parseHTMLToFSSpecString(const std::string &htmlContent); + std::map + parseWebDAVToFSSpecString(const std::string &htmlContent); + std::string extractHTMLTable(const std::string &htmlContent); + + struct stat *mystat; XrdSysError &m_log; + std::string m_object; + HTTPFileSystem *m_oss; + std::string m_hostname; + std::string m_hostUrl; + std::map m_remoteList; + std::string m_remote_flavor; + int m_bytesReturned; }; diff --git a/src/HTTPFile.cc b/src/HTTPFile.cc index dff96ee..a53b7af 100644 --- a/src/HTTPFile.cc +++ b/src/HTTPFile.cc @@ -48,50 +48,6 @@ XrdVERSIONINFO(XrdOssGetFileSystem, HTTP); HTTPFile::HTTPFile(XrdSysError &log, HTTPFileSystem *oss) : m_log(log), m_oss(oss), content_length(0), last_modified(0) {} -// Ensures that path is of the form /storagePrefix/object and returns -// the resulting object value. The storagePrefix does not necessarily begin -// with '/' -// -// Examples: -// /foo/bar, /foo/bar/baz -> baz -// storage.com/foo, /storage.com/foo/bar -> bar -// /baz, /foo/bar -> error -int parse_path(const std::string &storagePrefixStr, const char *pathStr, - std::string &object) { - const std::filesystem::path storagePath(pathStr); - const std::filesystem::path storagePrefix(storagePrefixStr); - - auto prefixComponents = storagePrefix.begin(); - auto pathComponents = storagePath.begin(); - - std::filesystem::path full; - std::filesystem::path prefix; - - pathComponents++; - if (!storagePrefixStr.empty() && storagePrefixStr[0] == '/') { - prefixComponents++; - } - - while (prefixComponents != storagePrefix.end() && - *prefixComponents == *pathComponents) { - full /= *prefixComponents++; - prefix /= *pathComponents++; - } - - // Check that nothing diverged before reaching end of service name - if (prefixComponents != storagePrefix.end()) { - return -ENOENT; - } - - std::filesystem::path obj_path; - while (pathComponents != storagePath.end()) { - obj_path /= *pathComponents++; - } - - object = obj_path.string(); - return 0; -} - int HTTPFile::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { auto configured_hostname = m_oss->getHTTPHostName(); auto configured_hostUrl = m_oss->getHTTPHostUrl(); @@ -117,7 +73,16 @@ int HTTPFile::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { if (!Oflag) { struct stat buf; - return Fstat(&buf); + Fstat(&buf); + if (S_ISDIR(buf.st_mode)) { + return -EISDIR; + } else { + return 0; + } + // XXX May need to return an error here to show that the request is + // against a directory instead of file could be: + // https://man7.org/linux/man-pages/man2/open.2.html return may be + // -EISDIR } return 0; @@ -146,7 +111,11 @@ ssize_t HTTPFile::Read(void *buffer, off_t offset, size_t size) { int HTTPFile::Fstat(struct stat *buff) { if (m_stat) { memset(buff, '\0', sizeof(struct stat)); - buff->st_mode = 0600 | S_IFREG; + if (m_object == "") + buff->st_mode = 0600 | S_IFDIR; + else + buff->st_mode = 0600 | S_IFREG; + buff->st_nlink = 1; buff->st_uid = 1; buff->st_gid = 1; @@ -232,9 +201,18 @@ int HTTPFile::Fstat(struct stat *buff) { current_newline = next_newline; } + // headers are totally different for a file versus an html stream + // describing a directory. note that here and fill out the buffer + // accordingly + + buff->st_mode = 0600 | S_IFDIR; if (buff) { memset(buff, '\0', sizeof(struct stat)); - buff->st_mode = 0600 | S_IFREG; + if (m_object == "" || m_object.back() == '/') + buff->st_mode = 0600 | S_IFDIR; + else + buff->st_mode = 0600 | S_IFREG; + buff->st_nlink = 1; buff->st_uid = 1; buff->st_gid = 1; diff --git a/src/HTTPFile.hh b/src/HTTPFile.hh index 68642e4..dc885a2 100644 --- a/src/HTTPFile.hh +++ b/src/HTTPFile.hh @@ -27,9 +27,6 @@ #include -int parse_path(const std::string &hostname, const char *path, - std::string &object); - class HTTPFile : public XrdOssDF { public: HTTPFile(XrdSysError &log, HTTPFileSystem *oss); diff --git a/src/HTTPFileSystem.cc b/src/HTTPFileSystem.cc index 536cabe..1c5fd9c 100644 --- a/src/HTTPFileSystem.cc +++ b/src/HTTPFileSystem.cc @@ -108,6 +108,8 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { http_host_url) || !handle_required_config(attribute, "httpserver.url_base", value, m_url_base) || + !handle_required_config(attribute, "httpserver.remote_flavor", + value, m_remote_flavor) || !handle_required_config(attribute, "httpserver.storage_prefix", value, m_storage_prefix) || !handle_required_config(attribute, "httpserver.token_file", value, @@ -127,6 +129,12 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { "httpserver.url_base are required"); return false; } + if (m_remote_flavor != "http" && m_remote_flavor != "webdav" && + m_remote_flavor != "auto") { + m_log.Emsg("Config", "Invalid httpserver.remote_flavor specified; " + "must be one of: 'http', 'webdav', or 'auto'"); + return false; + } } if (!token_file.empty()) { @@ -139,7 +147,7 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { // Object Allocation Functions // XrdOssDF *HTTPFileSystem::newDir(const char *user) { - return new HTTPDirectory(m_log); + return new HTTPDirectory(m_log, this); } XrdOssDF *HTTPFileSystem::newFile(const char *user) { @@ -150,11 +158,11 @@ int HTTPFileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { std::string error; - m_log.Emsg("Stat", "Stat'ing path", path); + // need to forward a HEAD request to the remote server HTTPFile httpFile(m_log, this); int rv = httpFile.Open(path, 0, (mode_t)0, *env); - if (rv) { + if (rv && rv != EISDIR) { m_log.Emsg("Stat", "Failed to open path:", path); } // Assume that HTTPFile::FStat() doesn't write to buff unless it succeeds. diff --git a/src/HTTPFileSystem.hh b/src/HTTPFileSystem.hh index ac21e90..f2da4b9 100644 --- a/src/HTTPFileSystem.hh +++ b/src/HTTPFileSystem.hh @@ -106,6 +106,7 @@ class HTTPFileSystem : public XrdOss { const std::string &getHTTPHostUrl() const { return http_host_url; } const std::string &getHTTPUrlBase() const { return m_url_base; } const std::string &getStoragePrefix() const { return m_storage_prefix; } + const std::string &getRemoteFlavor() const { return m_remote_flavor; } const TokenFile *getToken() const { return &m_token; } protected: @@ -120,5 +121,7 @@ class HTTPFileSystem : public XrdOss { std::string http_host_url; std::string m_url_base; std::string m_storage_prefix; + std::string m_remote_flavor; // http, webdav or auto. auto is currently a + // synonym for webdav TokenFile m_token; }; diff --git a/src/S3Directory.hh b/src/S3Directory.hh index b15207d..e333c66 100644 --- a/src/S3Directory.hh +++ b/src/S3Directory.hh @@ -27,10 +27,10 @@ class XrdSysError; -class S3Directory : public HTTPDirectory { +class S3Directory : public XrdOssDF { public: S3Directory(XrdSysError &log, const S3FileSystem &fs) - : HTTPDirectory(log), m_fs(fs) {} + : m_log(log), m_fs(fs) {} // Initialize it to false. virtual ~S3Directory() {} @@ -46,6 +46,7 @@ class S3Directory : public HTTPDirectory { void Reset(); int ListS3Dir(const std::string &ct); + XrdSysError &m_log; bool m_opened{false}; ssize_t m_idx{0}; std::vector m_objInfo; diff --git a/src/stl_string_utils.cc b/src/stl_string_utils.cc index 09e4b10..957433f 100644 --- a/src/stl_string_utils.cc +++ b/src/stl_string_utils.cc @@ -183,3 +183,47 @@ void trimslashes(std::string &path) { path = path.substr(begin, (end - begin) + 1); } } + +// Ensures that path is of the form /storagePrefix/object and returns +// the resulting object value. The storagePrefix does not necessarily begin +// with '/' +// +// Examples: +// /foo/bar, /foo/bar/baz -> baz +// storage.com/foo, /storage.com/foo/bar -> bar +// /baz, /foo/bar -> error +int parse_path(const std::string &storagePrefixStr, const char *pathStr, + std::string &object) { + const std::filesystem::path storagePath(pathStr); + const std::filesystem::path storagePrefix(storagePrefixStr); + + auto prefixComponents = storagePrefix.begin(); + auto pathComponents = storagePath.begin(); + + std::filesystem::path full; + std::filesystem::path prefix; + + pathComponents++; + if (!storagePrefixStr.empty() && storagePrefixStr[0] == '/') { + prefixComponents++; + } + + while (prefixComponents != storagePrefix.end() && + *prefixComponents == *pathComponents) { + full /= *prefixComponents++; + prefix /= *pathComponents++; + } + + // Check that nothing diverged before reaching end of service name + if (prefixComponents != storagePrefix.end()) { + return -ENOENT; + } + + std::filesystem::path obj_path; + while (pathComponents != storagePath.end()) { + obj_path /= *pathComponents++; + } + + object = obj_path.string(); + return 0; +} diff --git a/src/stl_string_utils.hh b/src/stl_string_utils.hh index d119e4e..2e6edc0 100644 --- a/src/stl_string_utils.hh +++ b/src/stl_string_utils.hh @@ -18,6 +18,7 @@ #pragma once +#include #include #ifndef CHECK_PRINTF_FORMAT @@ -53,3 +54,6 @@ std::string urlquote(const std::string input); // foo/bar/// -> foo/bar // /a/b -> a/b void trimslashes(std::string &path); + +int parse_path(const std::string &storagePrefixStr, const char *path, + std::string &object); diff --git a/test/http_tests.cc b/test/http_tests.cc index 4429f53..a86589b 100644 --- a/test/http_tests.cc +++ b/test/http_tests.cc @@ -22,8 +22,11 @@ #include #include #include +#include #include +#include +#include #include #include #include @@ -58,6 +61,29 @@ void parseEnvFile(const std::string &fname) { } } +TEST(TestHTTPFile, TestList) { + XrdSysLogger log; + + HTTPFileSystem fs(&log, g_config_file.c_str(), nullptr); + + struct stat si; + auto rc = fs.Stat("/testdir", &si); + ASSERT_EQ(rc, 0); + ASSERT_EQ(si.st_size, 4096); + + auto fd = fs.newDir(); + struct stat *statStruct = new struct stat; + fd->StatRet(statStruct); + XrdOucEnv env; + rc = fd->Open("/testdir", O_RDONLY, 0700, env); + ASSERT_EQ(rc, -21); + ASSERT_EQ(fd->Opendir("/testdir", env), 0); + + char buf[255]; + auto res = fd->Readdir(buf, 255); + ASSERT_EQ(res, 15); +} + TEST(TestHTTPFile, TestXfer) { XrdSysLogger log; @@ -104,7 +130,23 @@ TEST(TestHTTPParseProtocol, Test1) { ASSERT_EQ(protocol, "http"); } +void segfaultHandler(int sig) { + void *array[20]; + size_t size; + + // Get void*'s for all entries on the stack + size = backtrace(array, 20); + + // Print stack trace to stderr + fprintf(stderr, "Error: signal %d:\n", sig); + backtrace_symbols_fd(array, size, STDERR_FILENO); + + exit(1); +} + int main(int argc, char **argv) { + signal(SIGSEGV, segfaultHandler); + ::testing::InitGoogleTest(&argc, argv); if (argc != 2) { diff --git a/test/xrdhttp-setup.sh b/test/xrdhttp-setup.sh index 2b94c27..4eb5400 100755 --- a/test/xrdhttp-setup.sh +++ b/test/xrdhttp-setup.sh @@ -167,6 +167,8 @@ EOF # Export some data through the origin echo "Hello, World" > "$XROOTD_EXPORTDIR/hello_world.txt" +mkdir "$XROOTD_EXPORTDIR/testdir" +echo "Hello, World" > "$XROOTD_EXPORTDIR/testdir/hello_world.txt" # Launch XRootD daemon. "$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- >>"$BINARY_DIR/tests/$TEST_NAME/server.log" 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" & @@ -199,9 +201,15 @@ echo "xrootd started at $XROOTD_URL" XROOTD_HTTPSERVER_CONFIG="$XROOTD_CONFIGDIR/xrootd-httpserver.cfg" cat > "$XROOTD_HTTPSERVER_CONFIG" <