Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions csrc/ops/alias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1023,11 +1023,7 @@ TensorView* broadcast(
nBCastDims - n_broadcasts);

if (n_broadcasts == 0) {
auto identity = set(inp);
NVF_ERROR(
identity->getValType().value() == ValType::TensorView,
"Expected identity op, but didn't get a TensorView back.");
return identity->as<TensorView>();
return inp;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change seems reasonable to me.

}

std::vector<IterDomain*> out_domain;
Expand Down
39 changes: 39 additions & 0 deletions tests/cpp/test_alias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <alias_analysis.h>
#include <fusion.h>
#include <fusion_profiler.h>
#include <ir/internal_nodes.h>
#include <ir/iostream.h>
#include <ir/utils.h>
#include <ops/alias.h>
Expand Down Expand Up @@ -1659,4 +1660,42 @@ TEST_F(AliasTest, SliceOfExpandedBroadcast) {
executor_cache.fusion(), out_tensors, {in_tensor}, __LINE__, __FILE__);
}

TEST_F(AliasTest, BroadcastInDimNoRedundantSet) {
// Test that broadcast with no actual broadcasting does not introduce
// a redundant Set operation
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

TensorView* in = makeContigConcreteTensor({2, 3});
fusion->addInput(in);

// Call broadcast with all dims marked as non-broadcast
// This should not introduce a Set operation and return the input directly
std::vector<bool> is_broadcast_dim = {false, false};
TensorView* maybe_bcast = broadcast(in, is_broadcast_dim);

// Add an operation to ensure we have something to test
TensorView* out = abs(maybe_bcast);

fusion->addOutput(out);

// Verify that no LoadStoreOp with type Set is in the fusion
auto exprs = fusion->exprs();
for (auto expr : exprs) {
if (auto load_store = dynamic_cast<LoadStoreOp*>(expr)) {
EXPECT_NE(load_store->opType(), LoadStoreOpType::Set)
<< "Unexpected Set operation found in fusion with no-op broadcast";
}
}

// Verify the fusion still works correctly
FusionExecutorCache executor_cache(std::move(fusion));
at::Tensor in_tensor =
at::randn({2, 3}, at::dtype(at::kFloat).device(at::kCUDA));
auto out_tensors = executor_cache.runFusionWithInputs({in_tensor});

testValidate(
executor_cache.fusion(), out_tensors, {in_tensor}, __LINE__, __FILE__);
}

} // namespace nvfuser
44 changes: 44 additions & 0 deletions tests/python/test_python_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,50 @@ def fusion_func_3(fd: FusionDefinition):
)
self.assertEqual(eager_out, nvf_out[0])

def test_broadcast_in_dim_no_redundant_set(self):
"""
Test that broadcast_in_dim doesn't introduce redundant Set operations
when all input dimensions are in broadcast_dims (i.e., no actual broadcast).

This verifies the fix for the issue where broadcast_in_dim would create
a redundant float-to-float cast operation via Set when the input already
had the correct shape.
"""
inputs = [
torch.ones(1, 4, device="cuda"),
torch.randn(2, 4, device="cuda"),
]

def fusion_with_broadcast_in_dim(fd: FusionDefinition):
t0 = fd.define_tensor(shape=[1, -1], contiguity=[None, True])
t1 = fd.define_tensor(shape=[-1, -1], contiguity=[True, True])
# broadcast_in_dim with broadcast_dims=[0, 1] means no new dims are added
Copy link

Copilot AI Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The comment states "broadcast_in_dim with broadcast_dims=[0, 1] means no new dims are added" but could be more precise. While technically correct that no new dimensions are added to the tensor (input is 2D, output is 2D), this comment could be clearer. Consider rewording to: "broadcast_in_dim with all input dims in broadcast_dims means no broadcasting operation occurs" or "broadcast_dims=[0, 1] for a 2D input maps all input dimensions, so no new broadcast dimensions are created".

Suggested change
# broadcast_in_dim with broadcast_dims=[0, 1] means no new dims are added
# broadcast_in_dim with all input dims in broadcast_dims means no broadcasting operation occurs

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The review comment is not correct, broadcasting operation occurs and it involves expanding 1-sized dimension.

t2 = fd.ops.broadcast_in_dim(t0, t1.shape(), [0, 1])
t3 = fd.ops.add(t2, t1)
fd.add_output(t3)

def fusion_with_expand(fd: FusionDefinition):
t0 = fd.define_tensor(shape=[1, -1], contiguity=[None, True])
t1 = fd.define_tensor(shape=[-1, -1], contiguity=[True, True])
# Direct expand without broadcast_in_dim
t2 = fd.ops.expand(t0, t1.shape())
t3 = fd.ops.add(t2, t1)
fd.add_output(t3)

# Execute both fusions and verify they produce the same result
nvf_out_bid, fd_bid = self.exec_nvfuser(fusion_with_broadcast_in_dim, inputs)
nvf_out_exp, fd_exp = self.exec_nvfuser(fusion_with_expand, inputs)

# Verify correctness
eager_out = inputs[0] + inputs[1]
self.assertEqual(eager_out, nvf_out_bid[0])
self.assertEqual(eager_out, nvf_out_exp[0])
Comment on lines +700 to +707
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These checks (and inputs) are probably not necessary since the point of the test is just that the IR should match exactly whether we use broadcast_in_dim or expand whenever there is no new broadcast.


# Check that the broadcast_in_dim fusion doesn't have a redundant Set operation
# by comparing the IR string representations - they should be identical since
# broadcast is a no-op in this case
self.assertEqual(str(fd_bid), str(fd_exp))

# Testing a scenario where the broadcast is necessary to realize the output
def test_tensor_shape_with_output_bcast(self):
def fusion_func(fd: FusionDefinition):
Expand Down