Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions fwprovider/test/resource_vm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,188 @@ func TestAccResourceVMNetwork(t *testing.T) {
),
},
}},
{"remove network device", []resource.TestStep{
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false

network_device {
bridge = "vmbr0"
}
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "1",
"network_device.0.bridge": "vmbr0",
}),
),
},
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "0",
}),
),
},
}},
{"multiple network devices removal", []resource.TestStep{
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false

network_device {
bridge = "vmbr0"
model = "virtio"
}

network_device {
bridge = "vmbr1"
model = "virtio"
}
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "2",
"network_device.0.bridge": "vmbr0",
"network_device.1.bridge": "vmbr1",
}),
),
},
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false

# Only keep the first network device
network_device {
bridge = "vmbr0"
model = "virtio"
}
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "1",
"network_device.0.bridge": "vmbr0",
}),
),
},
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "0",
}),
),
},
}},
{"network device state consistency", []resource.TestStep{
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false

network_device {
bridge = "vmbr0"
model = "virtio"
}
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "1",
"network_device.0.bridge": "vmbr0",
"network_device.0.model": "virtio",
}),
),
},
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
// This step tests that the state is read correctly after network device removal
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "0",
}),
),
},
{
SkipFunc: func() (bool, error) {
// backward incompatibility with the current implementation of clone
// see https://github.com/bpg/terraform-provider-proxmox/pull/2260
return true, nil
},
// This step tests that we can add network devices back after removal
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "test_vm" {
node_name = "{{.NodeName}}"
started = false

network_device {
bridge = "vmbr0"
model = "virtio"
}
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.test_vm", map[string]string{
"network_device.#": "1",
"network_device.0.bridge": "vmbr0",
"network_device.0.model": "virtio",
}),
),
},
}},
}

for _, tt := range tests {
Expand Down Expand Up @@ -821,6 +1003,29 @@ func TestAccResourceVMClone(t *testing.T) {
}),
),
}}},
{"clone with network devices", []resource.TestStep{{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is a failing test for cloned VM scenario @TobiPeterG

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    --- FAIL: TestAccResourceVMClone/clone_with_network_devices (5.15s)
        resource_vm_test.go:1019: Step 1/1 error: After applying this test step, the non-refresh plan was not empty.
            stdout:
            
            
            Terraform used the selected providers to generate the following execution
            plan. Resource actions are indicated with the following symbols:
              ~ update in-place
            
            Terraform will perform the following actions:
            
              # proxmox_virtual_environment_vm.clone will be updated in-place
              ~ resource "proxmox_virtual_environment_vm" "clone" {
                    id                      = "110"
                  ~ ipv4_addresses          = [] -> (known after apply)
                  ~ ipv6_addresses          = [] -> (known after apply)
                  ~ network_interface_names = [] -> (known after apply)
                    # (25 unchanged attributes hidden)
            
                  - network_device {
                      - bridge       = "vmbr0" -> null
                      - disconnected = false -> null
                      - enabled      = true -> null
                      - firewall     = false -> null
                      - mac_address  = "BC:24:11:57:93:1C" -> null
                      - model        = "virtio" -> null
                      - mtu          = 0 -> null
                      - queues       = 0 -> null
                      - rate_limit   = 0 -> null
                      - vlan_id      = 0 -> null
                        # (1 unchanged attribute hidden)
                    }
            
                    # (1 unchanged block hidden)
                }
            
            Plan: 0 to add, 1 to change, 0 to destroy.
FAIL
FAIL    github.com/bpg/terraform-provider-proxmox/fwprovider/test       10.549s
FAIL

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please elaborate why the behaviour is wrong. If I understand it correctly, you create a VM with one network interface and then clone it to a VM without one. I would expect opentofu to remove the network interface from the VM when it's not in the config for the new VM. If you wanted the new VM to have the interface, you would add it in the config. What happens in this case?

More interesting is the question what happens when you clone a VM with multiple network interfaces and configure the cloned VM to also have these interfaces. Are they correctly mapped?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure whether we want to keep the inherited network devices in the cloned VM or we expect the user to specify them. That is not my decision to make as I don't know what the intent was. If network devices should be specified whether or not they are cloned from a source VM, this PR solves the issue of removing extra network devices.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would argue that we want to define the state of the infrastructure through opentofu, everything that is in the opentofu config should be there in reality and everything that is there in reality should be configured through opentofu. As a user, I would be confused when my opentofu config doesn't mention a network interface, but it actually exists on my infrastructure. But I guess it's bpg's call to make.

I would also wonder: How would you remove a network interface from a VM you clone?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My go-to approach is to use cloud-init to configure the base OS image the way I need, skipping cloning altogether.

I do this too. As a result, I'm not really qualified to jump in on this discussion since I don't use cloning, nor have I looked into how the Proxmox API exposes cloning, but I've run into similar minor issues when importing resources from public cloud providers....

Occasionally when importing a public cloud resource, I find that running a plan immediately after results in a proposed "change". Most of the time I don't care and just run apply to commit that setting into state, but other times I find that I don't actually want that setting and I need to edit my Terraform config to explicitly add that setting so I can change it to something else. Since the import succeeded with the config I had written, I've been assuming these minor anomalies are just due to added return values from the API call, or perhaps a mismatch in default values stored in the provider vs. the API vs. the cloud provider's web UI.

To me, Proxmox cloning feels like a server-side import of a VM resource. I wonder if it's worth considering cloning in this way? Ideally, the clone block would just signal to the provider to call the appropriate clone API to initially create the cloned vm, but the end result is a vm resource with all the required config a vm resource requires. Should some config be missing, modified, or added, those differences can either be applied at time of cloning, or on the next apply.

The intent was that if a clone has nothing except a reference to the template, it should create an exact copy with all devices inherited from the template.

I don't use cloning so I'm not really sure how the functionality works, but I suspect this matches how Proxmox works. But from a Terraform provider prospective, I think this inheritance is problematic. It seems tricky for both the provider and humans to reason about inherited config vs. explicit config and how to make changes to inherited config in the future. I've seen this issue come up occasionally in bug reports such as #1988. I think it's worth considering failing a cloned vm resource attempt if the vm resource block doesn't exactly match the source template (or with whatever minor changes that can be easily applied like a change in memory, or dropping a network interface).

Obviously this has massive backwards compatibility problems (and there are certainly nuances I'm unaware of), but I thought I'd throw out my idea that cloning could be considered a server-side resource "import". It certainly adds some friction when creating a cloned resource, but the end result would be a proper explicitly-defined vm resource that behaves like any other non-cloned vm resource.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intent was that if a clone has nothing except a reference to the template, it should create an exact copy with all devices inherited from the template. There are a few tests that capture this behavior, and it’s also somewhat documented here: https://registry.terraform.io/providers/bpg/proxmox/latest/docs/guides/clone-vm

And I don't think this behaviour should be changed.

Could you please elaborate why this shouldn't change? Why wouldn't you just create a opentofu config matching the setup of the cloned VM to have an exact copy?

So we can’t just delete a device if the clone doesn’t have it. Unfortunately, there’s no good workaround to capture this behaviour in the legacy VM resource, since devices are represented as a list, not a map as they should be.

In an ideal world (FWK provider, v1.+), we’d be able to define something like:

  network {
     "net0" = {
        // network device attributes
     }
  }

in the template, and then:

  network {
     "net0" = {} // or null
  }

in the clone to explicitly remove it.

Aren't explicit removings defined by not specifying an attribute? That's how it work with other providers I use as well. When it's not defined to be there, it's removed.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please elaborate why this shouldn't change? Why wouldn't you just create a opentofu config matching the setup of the cloned VM to have an exact copy?

That somewhat defeats the purpose of VM cloning and doesn’t match PVE’s behavior. In PVE, if you want to clone a VM, you simply provide a new ID and a target storage, and that’s pretty much it —- a full clone is created with all devices included.
I want to keep this behaviour because a) that's how PVE works, b) that's how the provider has been doin it from the very beginning. If this changes, then it will be a huge backward compatibility problem for a good portion of users out there.

Aren't explicit removings defined by not specifying an attribute? That's how it work with other providers I use as well. When it's not defined to be there, it's removed.

Generally it does, but again, those "special" clone creation and update cases. I want to keep the current behaviour for the reasons mentioned above. My memory is a bit hazy, but when I was first experimenting with this in the VM v2 resource and the Plugin FWK, it seemed that explicitly setting an attribute to an empty or null value was the only reliable way to handle that.

Anyway, looks like this PR opened a can of worms 😅, and this topic probably deserves a separate discussion. I’ll try to run more tests on this PR to see if it breaks any of the existing behaviour, and if it does, to what extent. Then see what we can do to merge it with minimal side effects.

Copy link
Contributor

@TobiPeterG TobiPeterG Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please elaborate why this shouldn't change? Why wouldn't you just create a opentofu config matching the setup of the cloned VM to have an exact copy?

That somewhat defeats the purpose of VM cloning and doesn’t match PVE’s behavior. In PVE, if you want to clone a VM, you simply provide a new ID and a target storage, and that’s pretty much it —- a full clone is created with all devices included. I want to keep this behaviour because a) that's how PVE works,

I see, this makes sense. The question is if the behavior of PVE is compatible with the concepts of terraform. In this case, it seems questionable.

b) that's how the provider has been doin it from the very beginning. If this changes, then it will be a huge backward compatibility problem for a good portion of users out there.

Yes, this is true. But should breaking changes stop "improvements" (general question)? The current versions would continue to work the same way, breaking changes could/should be announced in the release notes. If anyone updates the provider to a new version without checking what changed is at fault for breaking things I think.

Aren't explicit removings defined by not specifying an attribute? That's how it work with other providers I use as well. When it's not defined to be there, it's removed.

Generally it does, but again, those "special" clone creation and update cases. I want to keep the current behaviour for the reasons mentioned above. My memory is a bit hazy, but when I was first experimenting with this in the VM v2 resource and the Plugin FWK, it seemed that explicitly setting an attribute to an empty or null value was the only reliable way to handle that.

Anyway, looks like this PR opened a can of worms 😅, and this topic probably deserves a separate discussion. I’ll try to run more tests on this PR to see if it breaks any of the existing behaviour, and if it does, to what extent. Then see what we can do to merge it with minimal side effects.

Yes, a separate discussion would be great. :)

I would be curious why the clone operation exists in PVE and how/why it's used with Terraform. In my case, for example, I only need the clone operation to get the disk copied from my template to my new VM. If there's a better way to handle this, I'm absolutely open for it. What other use cases would one have with terraform to clone a VM instead of copying the config of the template VM?
If there isn't a better way for disks, maybe it's worth making the clone operation a per-disk operation in terraform (only close a specific disk from another VM) and make the clone operation a completely new resource (e.g. proxmox_virtual_environment_clone) where a user can only specify what to clone and the new name without being able to modify the clone in any other way.

This way, the current clone operation could be removed and there wouldn't be "hidden", breaking changes. :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's continue the discussion here: #2324

I tried to summarize the different options, I hope I didn't miss anything. If I did, feel free to add your points and thoughts :)

Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "template" {
node_name = "{{.NodeName}}"
started = false
network_device {
bridge = "vmbr0"
}
}
resource "proxmox_virtual_environment_vm" "clone" {
node_name = "{{.NodeName}}"
started = false
clone {
vm_id = proxmox_virtual_environment_vm.template.vm_id
}
}`),
Check: resource.ComposeTestCheckFunc(
ResourceAttributes("proxmox_virtual_environment_vm.clone", map[string]string{
"network_device.#": "1",
"network_device.0.bridge": "vmbr0",
}),
),
}}},
{"clone initialization datastore does not exist", []resource.TestStep{{
Config: te.RenderConfig(`
resource "proxmox_virtual_environment_vm" "template" {
Expand Down
2 changes: 1 addition & 1 deletion proxmoxtf/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func providerConfigure(ctx context.Context, d *schema.ResourceData) (interface{}
otp = v.(string)
}

///nolint:staticcheck
//nolint:staticcheck
if v, ok := d.GetOkExists(mkProviderUsername); ok {
username = v.(string)
}
Expand Down
122 changes: 42 additions & 80 deletions proxmoxtf/resource/vm/network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,21 @@ func GetNetworkDeviceObjects(d *schema.ResourceData) (vms.CustomNetworkDevices,
return networkDeviceObjects, nil
}

// ReadNetworkDeviceObjects reads the network device objects from the resource data.
func valueOrDefault[T any](v *T, def T) T {
if v == nil {
return def
}

return *v
}

// ReadNetworkDeviceObjects reads the network device objects from the response data.
func ReadNetworkDeviceObjects(d *schema.ResourceData, vmConfig *vms.GetResponseData) diag.Diagnostics {
var diags diag.Diagnostics

// Compare the network devices to those stored in the state.
currentNetworkDeviceList := d.Get(MkNetworkDevice).([]interface{})
macAddresses := make([]interface{}, 0)
networkDevices := make([]interface{}, 0)

macAddresses := make([]interface{}, MaxNetworkDevices)
networkDeviceLast := -1
networkDeviceList := make([]interface{}, MaxNetworkDevices)
networkDeviceObjects := []*vms.CustomNetworkDevice{
vmConfig.NetworkDevice0,
vmConfig.NetworkDevice1,
Expand Down Expand Up @@ -142,85 +147,42 @@ func ReadNetworkDeviceObjects(d *schema.ResourceData, vmConfig *vms.GetResponseD
vmConfig.NetworkDevice31,
}

for ni, nd := range networkDeviceObjects {
networkDevice := map[string]interface{}{}

if nd != nil {
networkDeviceLast = ni

if nd.Bridge != nil {
networkDevice[mkNetworkDeviceBridge] = *nd.Bridge
} else {
networkDevice[mkNetworkDeviceBridge] = ""
}

networkDevice[mkNetworkDeviceEnabled] = nd.Enabled

if nd.LinkDown != nil {
networkDevice[mkNetworkDeviceDisconnected] = *nd.LinkDown
} else {
networkDevice[mkNetworkDeviceDisconnected] = false
}

if nd.Firewall != nil {
networkDevice[mkNetworkDeviceFirewall] = *nd.Firewall
} else {
networkDevice[mkNetworkDeviceFirewall] = false
}

if nd.MACAddress != nil {
macAddresses[ni] = *nd.MACAddress
} else {
macAddresses[ni] = ""
}

networkDevice[mkNetworkDeviceMACAddress] = macAddresses[ni]
networkDevice[mkNetworkDeviceModel] = nd.Model

if nd.Queues != nil {
networkDevice[mkNetworkDeviceQueues] = *nd.Queues
} else {
networkDevice[mkNetworkDeviceQueues] = 0
}

if nd.RateLimit != nil {
networkDevice[mkNetworkDeviceRateLimit] = *nd.RateLimit
} else {
networkDevice[mkNetworkDeviceRateLimit] = 0
}

if nd.Tag != nil {
networkDevice[mkNetworkDeviceVLANID] = nd.Tag
} else {
networkDevice[mkNetworkDeviceVLANID] = 0
}

if nd.Trunks != nil {
networkDevice[mkNetworkDeviceTrunks] = strings.Trim(
strings.Join(strings.Fields(fmt.Sprint(nd.Trunks)), ";"), "[]")
} else {
networkDevice[mkNetworkDeviceTrunks] = ""
}
for len(networkDeviceObjects) > 0 && networkDeviceObjects[len(networkDeviceObjects)-1] == nil {
// drop
networkDeviceObjects = networkDeviceObjects[:len(networkDeviceObjects)-1]
}

if nd.MTU != nil {
networkDevice[mkNetworkDeviceMTU] = nd.MTU
} else {
networkDevice[mkNetworkDeviceMTU] = 0
}
for _, netDevice := range networkDeviceObjects {
if netDevice == nil {
networkDevices = append(networkDevices, nil)
macAddresses = append(macAddresses, "")
} else {
macAddresses[ni] = ""
networkDevice[mkNetworkDeviceEnabled] = false
}

networkDeviceList[ni] = networkDevice
}
networkDevices = append(networkDevices, map[string]interface{}{
mkNetworkDeviceBridge: valueOrDefault(netDevice.Bridge, ""),
mkNetworkDeviceEnabled: netDevice.Enabled,
mkNetworkDeviceDisconnected: valueOrDefault(netDevice.LinkDown, false),
mkNetworkDeviceFirewall: valueOrDefault(netDevice.Firewall, false),
mkNetworkDeviceMACAddress: valueOrDefault(netDevice.MACAddress, ""),
mkNetworkDeviceModel: netDevice.Model,
mkNetworkDeviceQueues: valueOrDefault(netDevice.Queues, 0),
mkNetworkDeviceRateLimit: valueOrDefault(netDevice.RateLimit, 0),
mkNetworkDeviceVLANID: valueOrDefault(netDevice.Tag, 0),
mkNetworkDeviceMTU: valueOrDefault(netDevice.MTU, 0),
mkNetworkDeviceTrunks: func(trunks []int) string {
if trunks == nil {
return ""
}

if len(currentNetworkDeviceList) > 0 || networkDeviceLast > -1 {
err := d.Set(MkNetworkDevice, networkDeviceList[:networkDeviceLast+1])
diags = append(diags, diag.FromErr(err)...)
return strings.Trim(strings.Join(strings.Fields(fmt.Sprint(trunks)), ";"), "[]")
}(netDevice.Trunks),
})
macAddresses = append(macAddresses, valueOrDefault(netDevice.MACAddress, ""))
}
}

err := d.Set(mkMACAddresses, macAddresses[0:len(currentNetworkDeviceList)])
err := d.Set(MkNetworkDevice, networkDevices)
diags = append(diags, diag.FromErr(err)...)
err = d.Set(mkMACAddresses, macAddresses)
diags = append(diags, diag.FromErr(err)...)

return diags
Expand Down