Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
class CreateEmbeddingModelHuggingFace < ActiveRecord::Migration[7.1]
def up
EmbeddingModel.create!(
mode: "hugging_face",
models: [
"all-MiniLM-L6-v2",
"all-mpnet-base-v2",
"paraphrase-MiniLM-L12-v2",
"multi-qa-MiniLM-L6-cos-v1",
"msmarco-MiniLM-L6-cos-v5"
]
)
end

def down
EmbeddingModel.where(mode: "hugging_face").destroy_all
end
end
4 changes: 4 additions & 0 deletions server/db/data_schema.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
<<<<<<< HEAD
# frozen_string_literal: true

DataMigrate::Data.define(version: 20250724110926)
=======
DataMigrate::Data.define(version: 20251230153941)
>>>>>>> 033521fbf (feat(CE): support for hugging face embedding models (#1565))
59 changes: 59 additions & 0 deletions server/lib/reverse_etl/transformers/embeddings/hugging_face.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# frozen_string_literal: true

module ReverseEtl
module Transformers
module Embeddings
class HuggingFace < Base
class HuggingFaceError < StandardError; end

HUGGING_FACE_EMBEDDING_URL = "https://router.huggingface.co/hf-inference/models/sentence-transformers"

def initialize(embedding_config)
super(embedding_config)
@api_key = embedding_config[:api_key]
@model = embedding_config[:model]
end

def generate_embedding(text)
hugging_face_embedding_request(text)
end

private

def hugging_face_embedding_request(text)
http_method = "POST"
payload = {
inputs: text,
normalize: true
}
headers = {
"Authorization" => "Bearer #{@api_key}",
"Content-Type" => "application/json"
}

begin
response = Multiwoven::Integrations::Core::HttpClient.request(
"#{HUGGING_FACE_EMBEDDING_URL}/#{@model}/pipeline/feature-extraction",
http_method,
payload:,
headers:
)
unless success?(response)
raise HuggingFaceError, "Hugging Face request failed with status #{response.code}: #{response.body}"
end

JSON.parse(response.body)
rescue JSON::ParserError => e
raise HuggingFaceError, "Failed to parse response from Hugging Face: #{e.message}"
rescue StandardError => e
raise HuggingFaceError, "An error occurred while making the Hugging Face request: #{e.message}"
end
end

def success?(response)
response && %w[200 201].include?(response.code.to_s)
end
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# frozen_string_literal: true

require "rails_helper"

RSpec.describe ReverseEtl::Transformers::Embeddings::HuggingFace, type: :model do
let(:embedding_config) { { "model" => "all-MiniLM-L6-v2", "api_key" => "fake_api_key" }.with_indifferent_access }
let(:hugging_face_embedding) { described_class.new(embedding_config) }
let(:sample_text) { "This is a sample text." }

describe "#generate_embedding" do
context "when the Hugging Face request is successful" do
let(:fake_response) { [0.1, 0.2, 0.3].to_json }

before do
allow(Multiwoven::Integrations::Core::HttpClient).to receive(:request)
.and_return(double("response", body: fake_response, code: "200"))
end

it "returns the embedding from the response" do
result = hugging_face_embedding.generate_embedding(sample_text)
expect(result).to eq([0.1, 0.2, 0.3])
end

it "calls HttpClient.request with the correct arguments" do
url = "https://router.huggingface.co/hf-inference/models/sentence-transformers/all-MiniLM-L6-v2/pipeline/feature-extraction"
http_method = "POST"
payload = { inputs: sample_text, normalize: true }
headers = { "Authorization" => "Bearer #{embedding_config[:api_key]}", "Content-Type" => "application/json" }

expect(Multiwoven::Integrations::Core::HttpClient).to receive(:request)
.with(url, http_method, payload:, headers:)
.and_return(double("response", body: fake_response, code: "200"))

hugging_face_embedding.generate_embedding(sample_text)
end
end

context "when the Hugging Face request fails" do
before do
allow(Multiwoven::Integrations::Core::HttpClient).to receive(:request)
.and_return(double("response", body: "Error", code: "500"))
end

it "raises a HuggingFaceError with the failure message" do
expect do
hugging_face_embedding.generate_embedding(sample_text)
end.to raise_error(ReverseEtl::Transformers::Embeddings::HuggingFace::HuggingFaceError,
/Hugging Face request failed with status 500: Error/)
end
end

context "when there is a JSON parsing error" do
before do
allow(Multiwoven::Integrations::Core::HttpClient).to receive(:request)
.and_return(double("response", body: "invalid", code: "200"))
end

it "raises a HuggingFaceError with the parse error message" do
expect do
hugging_face_embedding.generate_embedding(sample_text)
end.to raise_error(ReverseEtl::Transformers::Embeddings::HuggingFace::HuggingFaceError,
/Failed to parse response from Hugging Face/)
end
end

context "when there is a general error during the Hugging Face request" do
before do
allow(Multiwoven::Integrations::Core::HttpClient).to receive(:request)
.and_raise(StandardError.new("Something went wrong"))
end

it "raises a HuggingFaceError with the general error message" do
expect do
hugging_face_embedding.generate_embedding(sample_text)
end.to raise_error(ReverseEtl::Transformers::Embeddings::HuggingFace::HuggingFaceError,
/An error occurred while making the Hugging Face request: Something went wrong/)
end
end
end
end