Remove explicit sharding after applying LoRA.

The tunix Authors · The tunix Authors · commit 8301b818334e · 2025-10-30T15:34:57.000-07:00
PiperOrigin-RevId: 826211980
diff --git a/examples/dpo_demo_gemma3.ipynb b/examples/dpo_demo_gemma3.ipynb
@@ -293,12 +293,6 @@
         "      base_model, lora_provider, **model_input\n",
         "  )\n",
         "\n",
-        "  with mesh:\n",
-        "    state = nnx.state(lora_model)\n",
-        "    pspecs = nnx.get_partition_spec(state)\n",
-        "    sharded_state = jax.lax.with_sharding_constraint(state, pspecs)\n",
-        "    nnx.update(lora_model, sharded_state)\n",
-        "\n",
         "  return lora_model"
       ]
     },
diff --git a/examples/qlora_demo.ipynb b/examples/qlora_demo.ipynb
@@ -490,12 +490,6 @@
         "      base_model, lora_provider, **model_input\n",
         "  )\n",
         "\n",
-        "  with mesh:\n",
-        "    state = nnx.state(lora_model)\n",
-        "    pspecs = nnx.get_partition_spec(state)\n",
-        "    sharded_state = jax.lax.with_sharding_constraint(state, pspecs)\n",
-        "    nnx.update(lora_model, sharded_state)\n",
-        "\n",
         "  return lora_model"
       ]
     },
diff --git a/scripts/grpo_demo_llama3_qwen2.py b/scripts/grpo_demo_llama3_qwen2.py
@@ -422,12 +422,6 @@ def get_lora_model(base_model, model_mesh=None):
       base_model, lora_provider, **model_input
   )
 
-  with model_mesh:
-    state = nnx.state(lora_model)
-    pspecs = nnx.get_partition_spec(state)
-    sharded_state = jax.lax.with_sharding_constraint(state, pspecs)
-    nnx.update(lora_model, sharded_state)
-
   return lora_model
 
 
diff --git a/scripts/grpo_demo_sglang_jax_rollout.py b/scripts/grpo_demo_sglang_jax_rollout.py
@@ -380,12 +380,6 @@ def get_lora_model(base_model, mesh):
   #     base_model, lora_provider, **model_input
   # )
   lora_model = base_model
-  with mesh:
-    state = nnx.state(lora_model)
-    pspecs = nnx.get_partition_spec(state)
-    sharded_state = jax.lax.with_sharding_constraint(state, pspecs)
-    nnx.update(lora_model, sharded_state)
-
   return lora_model
 
 
diff --git a/tunix/cli/utils/model.py b/tunix/cli/utils/model.py
@@ -33,6 +33,7 @@
 from tunix.models.llama3 import model as llama3_lib
 from tunix.models.qwen2 import model as qwen2_lib
 from tunix.models.qwen3 import model as qwen3_lib
+from tunix.rl import reshard
 
 
 # Map prefixes to the target object containing the methods.
@@ -252,13 +253,8 @@ def apply_lora_to_model(base_model, mesh, lora_config):
   lora_model = qwix.apply_lora_to_model(
       base_model, lora_provider, **model_input
   )
-
-  with mesh:
-    state = nnx.state(lora_model)
-    pspecs = nnx.get_partition_spec(state)
-    sharded_state = jax.lax.with_sharding_constraint(state, pspecs)
-    nnx.update(lora_model, sharded_state)
-
+  if mesh is not None:
+    lora_model = reshard.reshard_model_to_mesh(lora_model, mesh)
   return lora_model
 
 
diff --git a/tunix/rl/reshard.py b/tunix/rl/reshard.py
@@ -26,7 +26,8 @@
 from absl import logging
 import jax
 import jaxtyping
-
+from flax import nnx
+from tunix.rl import utils
 
 # TODO(tsbao): move this to util
 def callback_on_ready(
@@ -483,3 +484,22 @@ def _get_dst_sharding(x):
       ),
   )
   return resharded_array
+
+
+def reshard_model_to_mesh(model: nnx.Module, mesh: jax.sharding.Mesh):
+  """Reshard the lora model if the mesh is specified and the lora model mesh is not the same as the input mesh."""
+  model_mesh = utils.get_pytree_mesh_info(nnx.state(model))
+  if mesh is not None and model_mesh != mesh:
+    with mesh:
+      graph_def, state = nnx.split(model)
+      default_memory_kind = jax.devices()[0].default_memory().kind
+      dst_shardings = jax.tree_util.tree_map(
+          lambda x: jax.sharding.NamedSharding(
+              mesh,
+              x,
+              memory_kind=default_memory_kind,
+          ),
+          nnx.get_partition_spec(state),
+      )
+      model = nnx.merge(graph_def, reshard_pytree(state, dst_shardings))
+  return model
diff --git a/tunix/tests/test_common.py b/tunix/tests/test_common.py
@@ -14,22 +14,23 @@
 
 """Common test utilities."""
 
-from typing import List, Tuple, Any
 from collections.abc import Iterable
 import dataclasses
+import gc
+import os
+import shutil
+from typing import Any, List, Tuple
 
 from flax import config as flax_config
 from flax import nnx
+import huggingface_hub
 import jax
 import jax.numpy as jnp
 import numpy as np
 import qwix
+from tunix.rl import reshard
 
 import sentencepiece as spm
-import huggingface_hub
-import os
-import shutil
-import gc
 
 if hasattr(flax_config, 'flax_always_shard_variable'):
   flax_config.update('flax_always_shard_variable', False)
@@ -159,11 +160,7 @@ def get_lora_model(
       model, lora_provider, **dummy_model_input
   )
   if mesh is not None:
-    with mesh:
-      state = nnx.state(lora_model)
-      pspecs = nnx.get_partition_spec(state)
-      sharded_state = jax.lax.with_sharding_constraint(state, pspecs)
-      nnx.update(lora_model, sharded_state)
+    lora_model = reshard.reshard_model_to_mesh(lora_model, mesh)
   return lora_model