Keep the logic for inference tensorpool forward consistent w/ set up before hetero sharding (#3553)

faran928 · meta-codesync[bot] · commit 7f73ad53048a · 2025-11-18T14:38:53.000-08:00
Summary: Pull Request resolved: #3553 Keep the logic for inference tensorpool forward consistent w/ set up before hetero sharding. Using optional tensor wrapper is interfering with lowering jobs as the model split boundary are different when tensorpool + TBE exist together Reverting some of the tests set up as well since this swaps the order for some of the nodes. Differential Revision: D87326553 fbshipit-source-id: ad29e23082e89dacb60f7af31a00e7c848f57f43
diff --git a/torchrec/distributed/tensor_pool.py b/torchrec/distributed/tensor_pool.py
@@ -473,9 +473,7 @@ def forward(self, ids: torch.Tensor) -> torch.Tensor:
         dist_input, unbucketize_permute, bucket_mapping, bucketized_lengths = (
             self._lookup_ids_dist(ids)
         )
-        unbucketize_permute_non_opt = _fx_item_unwrap_optional_tensor(
-            unbucketize_permute
-        )
+        unbucketize_permute_non_opt = unbucketize_permute
 
         lookup = self._lookup_local(dist_input)
 
@@ -512,12 +510,20 @@ def forward(self, ids: torch.Tensor) -> torch.Tensor:
             )
 
         output = self._lookup_values_dist(lookup_list)
-
-        return index_select_view(
-            output,
-            unbucketize_permute_non_opt.to(device=output.device),
-            self._dim,
-        )
+        # When memory_capacity_per_rank is added then boundary split for the
+        # model is different. Handling device movement accordingly
+        if self._sharding_plan.memory_capacity_per_rank is None:
+            return index_select_view(
+                output,
+                unbucketize_permute_non_opt,
+                self._dim,
+            )
+        else:
+            return index_select_view(
+                output,
+                unbucketize_permute_non_opt.to(device=output.device),
+                self._dim,
+            )
 
     # pyre-ignore
     def _update_values_dist(self, ctx: ObjectPoolShardingContext, values: torch.Tensor):