From 577571dd4b131402a0de2bd8c72e09590076f702 Mon Sep 17 00:00:00 2001 From: rw2 Date: Thu, 15 May 2025 17:22:41 -0500 Subject: [PATCH 1/7] this is buggy, but stashing it away before a refactor to match the workflow in the S3 version of this same code --- CMakeLists.txt | 4 ++-- src/HTTPCommands.cc | 12 ++++++++++++ src/HTTPCommands.hh | 16 ++++++++++++++++ src/HTTPDirectory.hh | 16 ++++++++++------ src/HTTPFile.cc | 28 +++++++++++++++++++++++++--- src/HTTPFileSystem.cc | 6 ++++-- test/http_tests.cc | 24 ++++++++++++++++++++++++ 7 files changed, 93 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6194e9f..cfbb3fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,7 +76,7 @@ add_definitions( -D_FILE_OFFSET_BITS=64 ) # On Linux, we hide all the symbols for the final libraries, exposing only what's needed for the XRootD # runtime loader. So here we create the object library and will create a separate one for testing with # the symbols exposed. -add_library(XrdS3Obj OBJECT src/CurlUtil.cc src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdS3Obj OBJECT src/CurlUtil.cc src/S3File.cc src/S3Directory.cc src/HTTPDirectory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) set_target_properties(XrdS3Obj PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(XrdS3Obj PRIVATE ${XRootD_INCLUDE_DIRS}) target_link_libraries( XrdS3Obj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto tinyxml2::tinyxml2 Threads::Threads std::filesystem std::atomic ) @@ -84,7 +84,7 @@ target_link_libraries( XrdS3Obj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRAR add_library(XrdS3 MODULE "$") target_link_libraries(XrdS3 XrdS3Obj) -add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPDirectory.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) set_target_properties(XrdHTTPServerObj PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(XrdHTTPServerObj PRIVATE ${XRootD_INCLUDE_DIRS}) target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto Threads::Threads std::filesystem) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index d43a10c..b42788d 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -817,6 +817,18 @@ bool HTTPDownload::SendRequest(off_t offset, size_t size) { // --------------------------------------------------------------------------- +HTTPList::~HTTPList() {} + +bool HTTPList::SendRequest() { + this->expectedResponseCode = 200; + + httpVerb = "GET"; + std::string noPayloadAllowed; + return SendHTTPRequest(noPayloadAllowed); +} + +// --------------------------------------------------------------------------- + HTTPHead::~HTTPHead() {} bool HTTPHead::SendRequest() { diff --git a/src/HTTPCommands.hh b/src/HTTPCommands.hh index 47a3d6a..3f6648b 100644 --- a/src/HTTPCommands.hh +++ b/src/HTTPCommands.hh @@ -321,6 +321,22 @@ class HTTPDownload : public HTTPRequest { std::string object; }; +class HTTPList : public HTTPRequest { + public: + HTTPList(const std::string &h, const std::string &o, XrdSysError &log, + const TokenFile *token) + : HTTPRequest(h, log, token), object(o) { + hostUrl = hostUrl + "/" + object; + } + + virtual ~HTTPList(); + + virtual bool SendRequest(); + + protected: + std::string object; +}; + class HTTPHead : public HTTPRequest { public: HTTPHead(const std::string &h, const std::string &o, XrdSysError &log, diff --git a/src/HTTPDirectory.hh b/src/HTTPDirectory.hh index 7e1068c..97e3cf4 100644 --- a/src/HTTPDirectory.hh +++ b/src/HTTPDirectory.hh @@ -18,22 +18,22 @@ #pragma once +#include "HTTPFileSystem.hh" #include "XrdOss/XrdOss.hh" #include "XrdOuc/XrdOucEnv.hh" +#include "logging.hh" -class XrdSysError; +using namespace XrdHTTPServer; class HTTPDirectory : public XrdOssDF { public: - HTTPDirectory(XrdSysError &log) : m_log(log) {} + HTTPDirectory(XrdSysError &log, HTTPFileSystem *oss); virtual ~HTTPDirectory() {} - virtual int Opendir(const char *path, XrdOucEnv &env) override { - return -ENOSYS; - } + virtual int Opendir(const char *path, XrdOucEnv &env) override; - virtual int Readdir(char *buff, int blen) override { return -ENOSYS; } + virtual int Readdir(char *buff, int blen) override; virtual int StatRet(struct stat *statStruct) override { return -ENOSYS; } @@ -41,4 +41,8 @@ class HTTPDirectory : public XrdOssDF { protected: XrdSysError &m_log; + std::string m_object; + HTTPFileSystem *m_oss; + std::string m_hostname; + std::string m_hostUrl; }; diff --git a/src/HTTPFile.cc b/src/HTTPFile.cc index dff96ee..7349649 100644 --- a/src/HTTPFile.cc +++ b/src/HTTPFile.cc @@ -117,7 +117,16 @@ int HTTPFile::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { if (!Oflag) { struct stat buf; - return Fstat(&buf); + Fstat(&buf); + if (S_ISDIR(buf.st_mode)) { + return EISDIR; + } else { + return 0; + } + // XXX May need to return an error here to show that the request is + // against a directory instead of file could be: + // https://man7.org/linux/man-pages/man2/open.2.html return may be + // -EISDIR } return 0; @@ -146,7 +155,11 @@ ssize_t HTTPFile::Read(void *buffer, off_t offset, size_t size) { int HTTPFile::Fstat(struct stat *buff) { if (m_stat) { memset(buff, '\0', sizeof(struct stat)); - buff->st_mode = 0600 | S_IFREG; + if (m_object == "") + buff->st_mode = 0600 | S_IFDIR; + else + buff->st_mode = 0600 | S_IFREG; + buff->st_nlink = 1; buff->st_uid = 1; buff->st_gid = 1; @@ -232,9 +245,18 @@ int HTTPFile::Fstat(struct stat *buff) { current_newline = next_newline; } + // headers are totally different for a file versus an html stream + // describing a directory. note that here and fill out the buffer + // accordingly + + buff->st_mode = 0600 | S_IFDIR; if (buff) { memset(buff, '\0', sizeof(struct stat)); - buff->st_mode = 0600 | S_IFREG; + if (m_object == "") + buff->st_mode = 0600 | S_IFDIR; + else + buff->st_mode = 0600 | S_IFREG; + buff->st_nlink = 1; buff->st_uid = 1; buff->st_gid = 1; diff --git a/src/HTTPFileSystem.cc b/src/HTTPFileSystem.cc index 536cabe..9b77b25 100644 --- a/src/HTTPFileSystem.cc +++ b/src/HTTPFileSystem.cc @@ -139,7 +139,7 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { // Object Allocation Functions // XrdOssDF *HTTPFileSystem::newDir(const char *user) { - return new HTTPDirectory(m_log); + return new HTTPDirectory(m_log, this); } XrdOssDF *HTTPFileSystem::newFile(const char *user) { @@ -152,9 +152,11 @@ int HTTPFileSystem::Stat(const char *path, struct stat *buff, int opts, m_log.Emsg("Stat", "Stat'ing path", path); + // need to forward a HEAD request to the remote server + HTTPFile httpFile(m_log, this); int rv = httpFile.Open(path, 0, (mode_t)0, *env); - if (rv) { + if (rv && rv != EISDIR) { m_log.Emsg("Stat", "Failed to open path:", path); } // Assume that HTTPFile::FStat() doesn't write to buff unless it succeeds. diff --git a/test/http_tests.cc b/test/http_tests.cc index 4429f53..4f1723e 100644 --- a/test/http_tests.cc +++ b/test/http_tests.cc @@ -58,6 +58,30 @@ void parseEnvFile(const std::string &fname) { } } +TEST(TestHTTPFile, TestList) { + XrdSysLogger log; + + HTTPFileSystem fs(&log, g_config_file.c_str(), nullptr); + + struct stat si; + auto rc = fs.Stat("/hello_world.txt", &si); + ASSERT_EQ(rc, 0); + ASSERT_EQ(si.st_size, 13); + + auto fh = fs.newFile(); + XrdOucEnv env; + rc = fh->Open("/hello_world.txt", O_RDONLY, 0700, env); + ASSERT_EQ(rc, 0); + + char buf[12]; + auto res = fh->Read(buf, 0, 12); + ASSERT_EQ(res, 12); + + ASSERT_EQ(memcmp(buf, "Hello, World", 12), 0); + + ASSERT_EQ(fh->Close(), 0); +} + TEST(TestHTTPFile, TestXfer) { XrdSysLogger log; From ac77e2b532190f1948314a7d824d681981f94f67 Mon Sep 17 00:00:00 2001 From: rw2 Date: Wed, 21 May 2025 18:49:18 -0500 Subject: [PATCH 2/7] returning something useful from xrootd listing --- CMakeLists.txt | 2 +- src/HTTPDirectory.hh | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cfbb3fa..9aae14c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,7 @@ target_link_libraries(XrdS3 XrdS3Obj) add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPDirectory.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) set_target_properties(XrdHTTPServerObj PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(XrdHTTPServerObj PRIVATE ${XRootD_INCLUDE_DIRS}) -target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto Threads::Threads std::filesystem) +target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl tinyxml2::tinyxml2 OpenSSL::Crypto Threads::Threads std::filesystem) add_library(XrdHTTPServer MODULE "$") target_link_libraries(XrdHTTPServer XrdHTTPServerObj) diff --git a/src/HTTPDirectory.hh b/src/HTTPDirectory.hh index 97e3cf4..e038184 100644 --- a/src/HTTPDirectory.hh +++ b/src/HTTPDirectory.hh @@ -28,7 +28,6 @@ using namespace XrdHTTPServer; class HTTPDirectory : public XrdOssDF { public: HTTPDirectory(XrdSysError &log, HTTPFileSystem *oss); - virtual ~HTTPDirectory() {} virtual int Opendir(const char *path, XrdOucEnv &env) override; @@ -40,9 +39,21 @@ class HTTPDirectory : public XrdOssDF { virtual int Close(long long *retsz = 0) override { return -ENOSYS; } protected: + struct FSSpecEntry { + std::string mode; + std::string flags; + std::string size; + std::string modified; + std::string name; + }; + + std::string parseHTMLToFSSpecString(const std::string &htmlContent); + std::string extractHTMLTable(const std::string &htmlContent); + XrdSysError &m_log; std::string m_object; HTTPFileSystem *m_oss; std::string m_hostname; std::string m_hostUrl; + bool m_readdirCalled; // Tracks if Readdir has been called. }; From 67658ad5ba6b65d451989ee984e8e23ce95c781a Mon Sep 17 00:00:00 2001 From: rw2 Date: Wed, 13 Aug 2025 13:50:23 -0500 Subject: [PATCH 3/7] http listing rewritten with due to new information about how xrootd works --- src/HTTPDirectory.hh | 15 ++++++++++--- src/HTTPFile.cc | 48 ++--------------------------------------- src/HTTPFile.hh | 3 --- src/HTTPFileSystem.cc | 8 +++++++ src/HTTPFileSystem.hh | 3 +++ src/S3Directory.hh | 5 +++-- src/stl_string_utils.cc | 44 +++++++++++++++++++++++++++++++++++++ src/stl_string_utils.hh | 4 ++++ 8 files changed, 76 insertions(+), 54 deletions(-) diff --git a/src/HTTPDirectory.hh b/src/HTTPDirectory.hh index e038184..6087067 100644 --- a/src/HTTPDirectory.hh +++ b/src/HTTPDirectory.hh @@ -22,6 +22,8 @@ #include "XrdOss/XrdOss.hh" #include "XrdOuc/XrdOucEnv.hh" #include "logging.hh" +#include +#include using namespace XrdHTTPServer; @@ -34,7 +36,10 @@ class HTTPDirectory : public XrdOssDF { virtual int Readdir(char *buff, int blen) override; - virtual int StatRet(struct stat *statStruct) override { return -ENOSYS; } + virtual int StatRet(struct stat *statStruct) override { + mystat = statStruct; + return SFS_OK; + } virtual int Close(long long *retsz = 0) override { return -ENOSYS; } @@ -47,13 +52,17 @@ class HTTPDirectory : public XrdOssDF { std::string name; }; - std::string parseHTMLToFSSpecString(const std::string &htmlContent); + std::map + parseHTMLToFSSpecString(const std::string &htmlContent); std::string extractHTMLTable(const std::string &htmlContent); + struct stat *mystat; XrdSysError &m_log; std::string m_object; HTTPFileSystem *m_oss; std::string m_hostname; std::string m_hostUrl; - bool m_readdirCalled; // Tracks if Readdir has been called. + std::map m_remoteList; + std::string m_remote_flavor; + int m_bytesReturned; }; diff --git a/src/HTTPFile.cc b/src/HTTPFile.cc index 7349649..a53b7af 100644 --- a/src/HTTPFile.cc +++ b/src/HTTPFile.cc @@ -48,50 +48,6 @@ XrdVERSIONINFO(XrdOssGetFileSystem, HTTP); HTTPFile::HTTPFile(XrdSysError &log, HTTPFileSystem *oss) : m_log(log), m_oss(oss), content_length(0), last_modified(0) {} -// Ensures that path is of the form /storagePrefix/object and returns -// the resulting object value. The storagePrefix does not necessarily begin -// with '/' -// -// Examples: -// /foo/bar, /foo/bar/baz -> baz -// storage.com/foo, /storage.com/foo/bar -> bar -// /baz, /foo/bar -> error -int parse_path(const std::string &storagePrefixStr, const char *pathStr, - std::string &object) { - const std::filesystem::path storagePath(pathStr); - const std::filesystem::path storagePrefix(storagePrefixStr); - - auto prefixComponents = storagePrefix.begin(); - auto pathComponents = storagePath.begin(); - - std::filesystem::path full; - std::filesystem::path prefix; - - pathComponents++; - if (!storagePrefixStr.empty() && storagePrefixStr[0] == '/') { - prefixComponents++; - } - - while (prefixComponents != storagePrefix.end() && - *prefixComponents == *pathComponents) { - full /= *prefixComponents++; - prefix /= *pathComponents++; - } - - // Check that nothing diverged before reaching end of service name - if (prefixComponents != storagePrefix.end()) { - return -ENOENT; - } - - std::filesystem::path obj_path; - while (pathComponents != storagePath.end()) { - obj_path /= *pathComponents++; - } - - object = obj_path.string(); - return 0; -} - int HTTPFile::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { auto configured_hostname = m_oss->getHTTPHostName(); auto configured_hostUrl = m_oss->getHTTPHostUrl(); @@ -119,7 +75,7 @@ int HTTPFile::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { struct stat buf; Fstat(&buf); if (S_ISDIR(buf.st_mode)) { - return EISDIR; + return -EISDIR; } else { return 0; } @@ -252,7 +208,7 @@ int HTTPFile::Fstat(struct stat *buff) { buff->st_mode = 0600 | S_IFDIR; if (buff) { memset(buff, '\0', sizeof(struct stat)); - if (m_object == "") + if (m_object == "" || m_object.back() == '/') buff->st_mode = 0600 | S_IFDIR; else buff->st_mode = 0600 | S_IFREG; diff --git a/src/HTTPFile.hh b/src/HTTPFile.hh index 68642e4..dc885a2 100644 --- a/src/HTTPFile.hh +++ b/src/HTTPFile.hh @@ -27,9 +27,6 @@ #include -int parse_path(const std::string &hostname, const char *path, - std::string &object); - class HTTPFile : public XrdOssDF { public: HTTPFile(XrdSysError &log, HTTPFileSystem *oss); diff --git a/src/HTTPFileSystem.cc b/src/HTTPFileSystem.cc index 9b77b25..0b317a5 100644 --- a/src/HTTPFileSystem.cc +++ b/src/HTTPFileSystem.cc @@ -108,6 +108,8 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { http_host_url) || !handle_required_config(attribute, "httpserver.url_base", value, m_url_base) || + !handle_required_config(attribute, "httpserver.remote_flavor", + value, m_remote_flavor) || !handle_required_config(attribute, "httpserver.storage_prefix", value, m_storage_prefix) || !handle_required_config(attribute, "httpserver.token_file", value, @@ -127,6 +129,12 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { "httpserver.url_base are required"); return false; } + if (m_remote_flavor != "http" && m_remote_flavor != "webdav" && + m_remote_flavor != "auto") { + m_log.Emsg("Config", "Invalid httpserver.remote_flavor specified; " + "must be one of: 'http', 'webdav', or 'auto'"); + return false; + } } if (!token_file.empty()) { diff --git a/src/HTTPFileSystem.hh b/src/HTTPFileSystem.hh index ac21e90..f2da4b9 100644 --- a/src/HTTPFileSystem.hh +++ b/src/HTTPFileSystem.hh @@ -106,6 +106,7 @@ class HTTPFileSystem : public XrdOss { const std::string &getHTTPHostUrl() const { return http_host_url; } const std::string &getHTTPUrlBase() const { return m_url_base; } const std::string &getStoragePrefix() const { return m_storage_prefix; } + const std::string &getRemoteFlavor() const { return m_remote_flavor; } const TokenFile *getToken() const { return &m_token; } protected: @@ -120,5 +121,7 @@ class HTTPFileSystem : public XrdOss { std::string http_host_url; std::string m_url_base; std::string m_storage_prefix; + std::string m_remote_flavor; // http, webdav or auto. auto is currently a + // synonym for webdav TokenFile m_token; }; diff --git a/src/S3Directory.hh b/src/S3Directory.hh index b15207d..e333c66 100644 --- a/src/S3Directory.hh +++ b/src/S3Directory.hh @@ -27,10 +27,10 @@ class XrdSysError; -class S3Directory : public HTTPDirectory { +class S3Directory : public XrdOssDF { public: S3Directory(XrdSysError &log, const S3FileSystem &fs) - : HTTPDirectory(log), m_fs(fs) {} + : m_log(log), m_fs(fs) {} // Initialize it to false. virtual ~S3Directory() {} @@ -46,6 +46,7 @@ class S3Directory : public HTTPDirectory { void Reset(); int ListS3Dir(const std::string &ct); + XrdSysError &m_log; bool m_opened{false}; ssize_t m_idx{0}; std::vector m_objInfo; diff --git a/src/stl_string_utils.cc b/src/stl_string_utils.cc index 09e4b10..957433f 100644 --- a/src/stl_string_utils.cc +++ b/src/stl_string_utils.cc @@ -183,3 +183,47 @@ void trimslashes(std::string &path) { path = path.substr(begin, (end - begin) + 1); } } + +// Ensures that path is of the form /storagePrefix/object and returns +// the resulting object value. The storagePrefix does not necessarily begin +// with '/' +// +// Examples: +// /foo/bar, /foo/bar/baz -> baz +// storage.com/foo, /storage.com/foo/bar -> bar +// /baz, /foo/bar -> error +int parse_path(const std::string &storagePrefixStr, const char *pathStr, + std::string &object) { + const std::filesystem::path storagePath(pathStr); + const std::filesystem::path storagePrefix(storagePrefixStr); + + auto prefixComponents = storagePrefix.begin(); + auto pathComponents = storagePath.begin(); + + std::filesystem::path full; + std::filesystem::path prefix; + + pathComponents++; + if (!storagePrefixStr.empty() && storagePrefixStr[0] == '/') { + prefixComponents++; + } + + while (prefixComponents != storagePrefix.end() && + *prefixComponents == *pathComponents) { + full /= *prefixComponents++; + prefix /= *pathComponents++; + } + + // Check that nothing diverged before reaching end of service name + if (prefixComponents != storagePrefix.end()) { + return -ENOENT; + } + + std::filesystem::path obj_path; + while (pathComponents != storagePath.end()) { + obj_path /= *pathComponents++; + } + + object = obj_path.string(); + return 0; +} diff --git a/src/stl_string_utils.hh b/src/stl_string_utils.hh index d119e4e..2e6edc0 100644 --- a/src/stl_string_utils.hh +++ b/src/stl_string_utils.hh @@ -18,6 +18,7 @@ #pragma once +#include #include #ifndef CHECK_PRINTF_FORMAT @@ -53,3 +54,6 @@ std::string urlquote(const std::string input); // foo/bar/// -> foo/bar // /a/b -> a/b void trimslashes(std::string &path); + +int parse_path(const std::string &storagePrefixStr, const char *path, + std::string &object); From 593a80fc3b5e447f2e2fcafe5f05c0f81af6dbda Mon Sep 17 00:00:00 2001 From: rw2 Date: Thu, 14 Aug 2025 12:31:19 -0500 Subject: [PATCH 4/7] http listing rewritten with due to new information about how xrootd works --- src/HTTPDirectory.cc | 211 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 src/HTTPDirectory.cc diff --git a/src/HTTPDirectory.cc b/src/HTTPDirectory.cc new file mode 100644 index 0000000..3e2828b --- /dev/null +++ b/src/HTTPDirectory.cc @@ -0,0 +1,211 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#include "HTTPDirectory.hh" +#include "HTTPCommands.hh" +#include "HTTPFile.hh" +#include "HTTPFileSystem.hh" +#include "logging.hh" +#include "stl_string_utils.hh" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +HTTPDirectory::HTTPDirectory(XrdSysError &log, HTTPFileSystem *oss) + : m_log(log), m_oss(oss), m_bytesReturned(0) {} // Initialize it to false. + +std::map +HTTPDirectory::parseHTMLToFSSpecString(const std::string &htmlContent) { + using namespace tinyxml2; + std::map remoteList; + + XMLDocument doc; + XMLError error = doc.Parse(htmlContent.c_str()); + if (error != XML_SUCCESS) { + std::cerr << "Failed to parse HTML!" << std::endl; + return remoteList; + } + + // Root of the HTML document + XMLNode *root = doc.FirstChild(); + if (!root) { + std::cerr << "No root found in HTML!" << std::endl; + return remoteList; + } + + // Traverse the rows in the table + for (XMLElement *row = root->FirstChildElement("tr"); row != nullptr; + row = row->NextSiblingElement("tr")) { + FSSpecEntry entry; + int columnIndex = 0; + + // Traverse each cell in the row + for (XMLElement *cell = row->FirstChildElement("td"); cell != nullptr; + cell = cell->NextSiblingElement("td")) { + const char *cellText = cell->GetText() ? cell->GetText() : ""; + + switch (columnIndex) { + case 0: // Mode + entry.mode = cellText; + break; + case 1: // Flags + entry.flags = cellText; + break; + case 2: // Size + entry.size = cellText; + break; + case 3: // Modified + entry.modified = cellText; + break; + case 4: // Name + if (XMLElement *aTag = cell->FirstChildElement("a")) { + const char *nameText = + aTag->GetText() ? aTag->GetText() : ""; + entry.name = nameText; + } + break; + default: + break; + } + columnIndex++; + } + + // Skip adding invalid/empty rows + if (entry.name.empty()) { + continue; + } + + struct stat workingFile; + workingFile.st_size = std::stoul(entry.size, nullptr, 10); + // workingFile.st_mtime = std::stoul(entry.modified, nullptr, 10); + if (entry.mode.substr(0, 1) == "d") + workingFile.st_mode = 0600 | S_IFDIR; + else + workingFile.st_mode = 0600 | S_IFREG; + + workingFile.st_nlink = 1; + workingFile.st_uid = 1; + workingFile.st_gid = 1; + workingFile.st_atime = 0; + workingFile.st_ctime = 0; + workingFile.st_dev = 0; + workingFile.st_ino = 0; + remoteList[entry.name] = workingFile; + } + + return remoteList; // Return the formatted list +} + +std::string HTTPDirectory::extractHTMLTable(const std::string &htmlContent) { + std::regex tableRegex(R"(]*>[\s\S]*?)", + std::regex_constants::icase); + + std::smatch match; + if (std::regex_search(htmlContent, match, tableRegex)) { + return match.str(); + } + + return ""; // Return an empty string if no table is found +} + +int HTTPDirectory::Readdir(char *buff, int blen) { + if (m_remoteList.size() > 0) { + std::string name = m_remoteList.begin()->first; + struct stat currentRecord = m_remoteList.begin()->second; + mystat->st_size = currentRecord.st_size; + mystat->st_mode = currentRecord.st_mode; + mystat->st_nlink = currentRecord.st_nlink; + mystat->st_uid = currentRecord.st_uid; + mystat->st_gid = currentRecord.st_gid; + mystat->st_atime = currentRecord.st_atime; + mystat->st_ctime = currentRecord.st_ctime; + mystat->st_dev = currentRecord.st_dev; + mystat->st_ino = currentRecord.st_ino; + memcpy(buff, name.c_str(), name.size() + 1); + m_remoteList.erase(m_remoteList.begin()); + return name.size(); + } else { + buff[0] = '\0'; + return 0; + } +} + +int HTTPDirectory::Opendir(const char *path, XrdOucEnv &env) { + m_log.Log(LogMask::Debug, "HTTPDirectory::Opendir", "Opendir called"); + auto configured_hostname = m_oss->getHTTPHostName(); + auto configured_hostUrl = m_oss->getHTTPHostUrl(); + const auto &configured_url_base = m_oss->getHTTPUrlBase(); + if (!configured_url_base.empty()) { + configured_hostUrl = configured_url_base; + configured_hostname = m_oss->getStoragePrefix(); + } + + // + // Check the path for validity. + // + std::string object; + int rv = parse_path(configured_hostname, path, object); + + if (rv != 0) { + return rv; + } + + m_object = object; + m_hostname = configured_hostname; + m_hostUrl = configured_hostUrl; + m_remote_flavor = m_oss->getRemoteFlavor(); + + if (m_remoteList.empty()) { + m_log.Log(LogMask::Debug, "HTTPFile::Readdir", "Readdir called"); + HTTPList list(m_hostUrl, m_object, m_log, m_oss->getToken()); + m_log.Log(LogMask::Debug, "HTTPDirectory::Readdir", + "About to perform download from HTTPDirectory::Readdir(): " + "hostname / object:", + m_hostname.c_str(), m_object.c_str()); + if (!list.SendRequest()) { + std::stringstream ss; + ss << "Failed to send GetObject command: " << list.getResponseCode() + << "'" << list.getResultString() << "'"; + m_log.Log(LogMask::Warning, "HTTPDirectory::Readdir", + ss.str().c_str()); + return 0; + } + + m_remoteList = + parseHTMLToFSSpecString(extractHTMLTable(list.getResultString())); + } + + return 0; +} From fbe179997c22b8335060a033030fbdb823dbf06e Mon Sep 17 00:00:00 2001 From: rw2 Date: Tue, 19 Aug 2025 16:26:16 -0500 Subject: [PATCH 5/7] unit tests added --- src/HTTPDirectory.cc | 8 ++++---- src/HTTPFileSystem.cc | 2 -- test/http_tests.cc | 42 ++++++++++++++++++++++++++++++------------ test/xrdhttp-setup.sh | 8 ++++++++ 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/src/HTTPDirectory.cc b/src/HTTPDirectory.cc index 3e2828b..bcb430b 100644 --- a/src/HTTPDirectory.cc +++ b/src/HTTPDirectory.cc @@ -188,17 +188,17 @@ int HTTPDirectory::Opendir(const char *path, XrdOucEnv &env) { m_remote_flavor = m_oss->getRemoteFlavor(); if (m_remoteList.empty()) { - m_log.Log(LogMask::Debug, "HTTPFile::Readdir", "Readdir called"); + m_log.Log(LogMask::Debug, "HTTPFile::Opendir", "Opendir called"); HTTPList list(m_hostUrl, m_object, m_log, m_oss->getToken()); - m_log.Log(LogMask::Debug, "HTTPDirectory::Readdir", - "About to perform download from HTTPDirectory::Readdir(): " + m_log.Log(LogMask::Debug, "HTTPDirectory::Opendir", + "About to perform download from HTTPDirectory::Opendir(): " "hostname / object:", m_hostname.c_str(), m_object.c_str()); if (!list.SendRequest()) { std::stringstream ss; ss << "Failed to send GetObject command: " << list.getResponseCode() << "'" << list.getResultString() << "'"; - m_log.Log(LogMask::Warning, "HTTPDirectory::Readdir", + m_log.Log(LogMask::Warning, "HTTPDirectory::Opendir", ss.str().c_str()); return 0; } diff --git a/src/HTTPFileSystem.cc b/src/HTTPFileSystem.cc index 0b317a5..1c5fd9c 100644 --- a/src/HTTPFileSystem.cc +++ b/src/HTTPFileSystem.cc @@ -158,8 +158,6 @@ int HTTPFileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { std::string error; - m_log.Emsg("Stat", "Stat'ing path", path); - // need to forward a HEAD request to the remote server HTTPFile httpFile(m_log, this); diff --git a/test/http_tests.cc b/test/http_tests.cc index 4f1723e..a86589b 100644 --- a/test/http_tests.cc +++ b/test/http_tests.cc @@ -22,8 +22,11 @@ #include #include #include +#include #include +#include +#include #include #include #include @@ -64,22 +67,21 @@ TEST(TestHTTPFile, TestList) { HTTPFileSystem fs(&log, g_config_file.c_str(), nullptr); struct stat si; - auto rc = fs.Stat("/hello_world.txt", &si); + auto rc = fs.Stat("/testdir", &si); ASSERT_EQ(rc, 0); - ASSERT_EQ(si.st_size, 13); + ASSERT_EQ(si.st_size, 4096); - auto fh = fs.newFile(); + auto fd = fs.newDir(); + struct stat *statStruct = new struct stat; + fd->StatRet(statStruct); XrdOucEnv env; - rc = fh->Open("/hello_world.txt", O_RDONLY, 0700, env); - ASSERT_EQ(rc, 0); - - char buf[12]; - auto res = fh->Read(buf, 0, 12); - ASSERT_EQ(res, 12); + rc = fd->Open("/testdir", O_RDONLY, 0700, env); + ASSERT_EQ(rc, -21); + ASSERT_EQ(fd->Opendir("/testdir", env), 0); - ASSERT_EQ(memcmp(buf, "Hello, World", 12), 0); - - ASSERT_EQ(fh->Close(), 0); + char buf[255]; + auto res = fd->Readdir(buf, 255); + ASSERT_EQ(res, 15); } TEST(TestHTTPFile, TestXfer) { @@ -128,7 +130,23 @@ TEST(TestHTTPParseProtocol, Test1) { ASSERT_EQ(protocol, "http"); } +void segfaultHandler(int sig) { + void *array[20]; + size_t size; + + // Get void*'s for all entries on the stack + size = backtrace(array, 20); + + // Print stack trace to stderr + fprintf(stderr, "Error: signal %d:\n", sig); + backtrace_symbols_fd(array, size, STDERR_FILENO); + + exit(1); +} + int main(int argc, char **argv) { + signal(SIGSEGV, segfaultHandler); + ::testing::InitGoogleTest(&argc, argv); if (argc != 2) { diff --git a/test/xrdhttp-setup.sh b/test/xrdhttp-setup.sh index 2b94c27..4eb5400 100755 --- a/test/xrdhttp-setup.sh +++ b/test/xrdhttp-setup.sh @@ -167,6 +167,8 @@ EOF # Export some data through the origin echo "Hello, World" > "$XROOTD_EXPORTDIR/hello_world.txt" +mkdir "$XROOTD_EXPORTDIR/testdir" +echo "Hello, World" > "$XROOTD_EXPORTDIR/testdir/hello_world.txt" # Launch XRootD daemon. "$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- >>"$BINARY_DIR/tests/$TEST_NAME/server.log" 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" & @@ -199,9 +201,15 @@ echo "xrootd started at $XROOTD_URL" XROOTD_HTTPSERVER_CONFIG="$XROOTD_CONFIGDIR/xrootd-httpserver.cfg" cat > "$XROOTD_HTTPSERVER_CONFIG" < Date: Thu, 21 Aug 2025 14:02:23 -0500 Subject: [PATCH 6/7] webdav functionality added and manually tested --- src/HTTPCommands.cc | 43 ++++++++++- src/HTTPCommands.hh | 13 ++++ src/HTTPDirectory.cc | 178 +++++++++++++++++++++++++++++++++++++++---- src/HTTPDirectory.hh | 2 + 4 files changed, 221 insertions(+), 15 deletions(-) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index b42788d..c9918c2 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -140,7 +140,7 @@ bool HTTPRequest::SendHTTPRequest(const std::string &payload) { return false; } - headers["Content-Type"] = "binary/octet-stream"; + // headers["Content-Type"] = "binary/octet-stream"; return sendPreparedRequest(hostUrl, payload, payload.size(), true); } @@ -468,6 +468,16 @@ bool HTTPRequest::SetupHandle(CURL *curl) { } } + if (httpVerb == "PROPFIND") { + rv = curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PROPFIND"); + if (rv != CURLE_OK) { + this->errorCode = "E_CURL_LIB"; + this->errorMessage = + "curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST) failed."; + return false; + } + } + if (httpVerb == "POST") { rv = curl_easy_setopt(curl, CURLOPT_POST, 1); if (rv != CURLE_OK) { @@ -475,7 +485,9 @@ bool HTTPRequest::SetupHandle(CURL *curl) { this->errorMessage = "curl_easy_setopt( CURLOPT_POST ) failed."; return false; } + } + if (httpVerb == "POST" || httpVerb == "PROPFIND") { rv = curl_easy_setopt(curl, CURLOPT_POSTFIELDS, m_payload.data()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; @@ -829,6 +841,35 @@ bool HTTPList::SendRequest() { // --------------------------------------------------------------------------- +HTTPPropfind::~HTTPPropfind() {} + +bool HTTPPropfind::SendRequest() { + httpVerb = "PROPFIND"; + headers["Depth"] = "1"; + headers["Content-Type"] = "application/xml"; + + if (!object.empty()) { + if (hostUrl.back() != '/' && object.front() != '/') { + hostUrl += '/'; + } + hostUrl += object; + } + + std::string payload = "" + " " + " " + " " + " " + " " + ""; + + expectedResponseCode = 207; + + return SendHTTPRequest(payload); +} + +// --------------------------------------------------------------------------- + HTTPHead::~HTTPHead() {} bool HTTPHead::SendRequest() { diff --git a/src/HTTPCommands.hh b/src/HTTPCommands.hh index 3f6648b..f74681c 100644 --- a/src/HTTPCommands.hh +++ b/src/HTTPCommands.hh @@ -337,6 +337,19 @@ class HTTPList : public HTTPRequest { std::string object; }; +class HTTPPropfind : public HTTPRequest { + public: + HTTPPropfind(const std::string &h, const std::string &o, XrdSysError &log, + const TokenFile *token) + : HTTPRequest(h, log, token), object(o) {} + + virtual ~HTTPPropfind(); + + virtual bool SendRequest(); + + std::string object; +}; + class HTTPHead : public HTTPRequest { public: HTTPHead(const std::string &h, const std::string &o, XrdSysError &log, diff --git a/src/HTTPDirectory.cc b/src/HTTPDirectory.cc index bcb430b..172c8b2 100644 --- a/src/HTTPDirectory.cc +++ b/src/HTTPDirectory.cc @@ -44,7 +44,142 @@ #include HTTPDirectory::HTTPDirectory(XrdSysError &log, HTTPFileSystem *oss) - : m_log(log), m_oss(oss), m_bytesReturned(0) {} // Initialize it to false. + : m_log(log), m_oss(oss), m_bytesReturned(0) {} + +std::map +HTTPDirectory::parseWebDAVToFSSpecString(const std::string &content) { + using namespace tinyxml2; + std::map remoteList; + + XMLDocument doc; + XMLError error = doc.Parse(content.c_str()); + if (error != XML_SUCCESS) { + std::cerr << "Failed to parse WebDAV XML response!" << std::endl; + return remoteList; + } + + XMLElement *multistatus = doc.FirstChildElement("ns0:multistatus"); + if (!multistatus) { + std::cerr << "No multistatus element found in WebDAV response!" + << std::endl; + return remoteList; + } + + for (XMLElement *response = multistatus->FirstChildElement("ns0:response"); + response != nullptr; + response = response->NextSiblingElement("ns0:response")) { + + XMLElement *href = response->FirstChildElement("ns0:href"); + if (!href || !href->GetText()) { + continue; + } + + std::string path = href->GetText(); + + struct stat fileStat = {}; + fileStat.st_nlink = 1; + fileStat.st_uid = 1; + fileStat.st_gid = 1; + fileStat.st_atime = 0; + fileStat.st_ctime = 0; + fileStat.st_dev = 0; + fileStat.st_ino = 0; + fileStat.st_mode = 0600 | S_IFREG; + + time_t modTime = 0; + off_t fileSize = 0; + + for (XMLElement *propstat = response->FirstChildElement("ns0:propstat"); + propstat != nullptr; + propstat = propstat->NextSiblingElement("ns0:propstat")) { + + // Check status first - we only want properties with 200 OK status + XMLElement *status = propstat->FirstChildElement("ns0:status"); + if (!status || !status->GetText() || + std::string(status->GetText()).find("200 OK") == + std::string::npos) { + continue; + } + + XMLElement *prop = propstat->FirstChildElement("ns0:prop"); + if (!prop) + continue; + + XMLElement *resourceType = + prop->FirstChildElement("ns0:resourcetype"); + if (resourceType && + resourceType->FirstChildElement("ns0:collection")) { + fileStat.st_mode = 0600 | S_IFDIR; + } + + XMLElement *lastModified = + prop->FirstChildElement("ns0:getlastmodified"); + if (lastModified && lastModified->GetText()) { + std::string modifiedStr = lastModified->GetText(); + + struct tm tm = {}; + char month[4] = {}; + int day, year, hour, min, sec; + + if (sscanf(modifiedStr.c_str(), "%*[^,], %d %3s %d %d:%d:%d", + &day, month, &year, &hour, &min, &sec) == 6) { + + const char *months[] = {"Jan", "Feb", "Mar", "Apr", + "May", "Jun", "Jul", "Aug", + "Sep", "Oct", "Nov", "Dec"}; + int monthNum = 0; + for (int i = 0; i < 12; i++) { + if (strcmp(month, months[i]) == 0) { + monthNum = i; + break; + } + } + + tm.tm_year = year - 1900; + tm.tm_mon = monthNum; + tm.tm_mday = day; + tm.tm_hour = hour; + tm.tm_min = min; + tm.tm_sec = sec; + + modTime = mktime(&tm); + } + } + + // Get content length + XMLElement *contentLength = + prop->FirstChildElement("ns0:getcontentlength"); + if (contentLength && contentLength->GetText()) { + try { + fileSize = std::stoll(contentLength->GetText()); + } catch (std::exception &e) { + fileSize = 0; + } + } + } + + fileStat.st_size = fileSize; + fileStat.st_mtime = modTime; + std::string name = path; + + if (!name.empty() && name.back() == '/') { + name.pop_back(); + } + + size_t lastSlash = name.find_last_of('/'); + if (lastSlash != std::string::npos) { + name = name.substr(lastSlash + 1); + } + + if (name.empty() && path.length() > 0 && path[0] == '/') { + name = "/"; + } + + remoteList[name] = fileStat; + } + + return remoteList; +} std::map HTTPDirectory::parseHTMLToFSSpecString(const std::string &htmlContent) { @@ -188,24 +323,39 @@ int HTTPDirectory::Opendir(const char *path, XrdOucEnv &env) { m_remote_flavor = m_oss->getRemoteFlavor(); if (m_remoteList.empty()) { - m_log.Log(LogMask::Debug, "HTTPFile::Opendir", "Opendir called"); - HTTPList list(m_hostUrl, m_object, m_log, m_oss->getToken()); m_log.Log(LogMask::Debug, "HTTPDirectory::Opendir", "About to perform download from HTTPDirectory::Opendir(): " "hostname / object:", m_hostname.c_str(), m_object.c_str()); - if (!list.SendRequest()) { - std::stringstream ss; - ss << "Failed to send GetObject command: " << list.getResponseCode() - << "'" << list.getResultString() << "'"; - m_log.Log(LogMask::Warning, "HTTPDirectory::Opendir", - ss.str().c_str()); - return 0; - } + if (m_oss->getRemoteFlavor() == "webdav") { + HTTPPropfind request = + HTTPPropfind(m_hostUrl, m_object, m_log, m_oss->getToken()); + if (!request.SendRequest()) { + std::stringstream ss; + ss << "Failed to send GetObject command: " + << request.getResponseCode() << "'" + << request.getResultString() << "'"; + m_log.Log(LogMask::Warning, "HTTPDirectory::Opendir", + ss.str().c_str()); + return 0; + } + m_remoteList = parseWebDAVToFSSpecString(request.getResultString()); - m_remoteList = - parseHTMLToFSSpecString(extractHTMLTable(list.getResultString())); + } else { + HTTPList request = + HTTPList(m_hostUrl, m_object, m_log, m_oss->getToken()); + if (!request.SendRequest()) { + std::stringstream ss; + ss << "Failed to send GetObject command: " + << request.getResponseCode() << "'" + << request.getResultString() << "'"; + m_log.Log(LogMask::Warning, "HTTPDirectory::Opendir", + ss.str().c_str()); + return 0; + } + m_remoteList = parseHTMLToFSSpecString( + extractHTMLTable(request.getResultString())); + } } - return 0; } diff --git a/src/HTTPDirectory.hh b/src/HTTPDirectory.hh index 6087067..c6bd9d0 100644 --- a/src/HTTPDirectory.hh +++ b/src/HTTPDirectory.hh @@ -54,6 +54,8 @@ class HTTPDirectory : public XrdOssDF { std::map parseHTMLToFSSpecString(const std::string &htmlContent); + std::map + parseWebDAVToFSSpecString(const std::string &htmlContent); std::string extractHTMLTable(const std::string &htmlContent); struct stat *mystat; From 05f9d78d2a4d62500fd7819b584917018fbb4af0 Mon Sep 17 00:00:00 2001 From: rw2 Date: Thu, 21 Aug 2025 14:13:34 -0500 Subject: [PATCH 7/7] webdav functionality added and manually tested --- src/HTTPCommands.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index c9918c2..476116c 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -140,7 +140,8 @@ bool HTTPRequest::SendHTTPRequest(const std::string &payload) { return false; } - // headers["Content-Type"] = "binary/octet-stream"; + if (headers.find("Content-Type") == headers.end()) + headers["Content-Type"] = "binary/octet-stream"; return sendPreparedRequest(hostUrl, payload, payload.size(), true); }