Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions csrc/ops/alias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1023,11 +1023,7 @@ TensorView* broadcast(
nBCastDims - n_broadcasts);

if (n_broadcasts == 0) {
auto identity = set(inp);
NVF_ERROR(
identity->getValType().value() == ValType::TensorView,
"Expected identity op, but didn't get a TensorView back.");
return identity->as<TensorView>();
return inp;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change seems reasonable to me.

}

std::vector<IterDomain*> out_domain;
Expand Down
36 changes: 36 additions & 0 deletions tests/cpp/test_alias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <alias_analysis.h>
#include <fusion.h>
#include <fusion_profiler.h>
#include <ir/internal_nodes.h>
#include <ir/iostream.h>
#include <ir/utils.h>
#include <ops/alias.h>
Expand Down Expand Up @@ -1659,4 +1660,39 @@ TEST_F(AliasTest, SliceOfExpandedBroadcast) {
executor_cache.fusion(), out_tensors, {in_tensor}, __LINE__, __FILE__);
}

TEST_F(AliasTest, BroadcastInDimNoRedundantSet) {
// Test that broadcast with no actual broadcasting does not introduce
// a redundant Set operation
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

TensorView* in = makeContigConcreteTensor({2, 3});
fusion->addInput(in);

// Call broadcast with all dims marked as non-broadcast
// This should not introduce a Set operation
std::vector<bool> is_broadcast_dim = {false, false};
TensorView* out = broadcast(in, is_broadcast_dim);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fusion is empty, so the verification seems silly.

Suggested change
TensorView* out = broadcast(in, is_broadcast_dim);
TensorView* maybe_bcast = broadcast(in, is_broadcast_dim);
TensorView* out = abs(maybe_bcast);

Or just check:
EXPECT_EQ(fusion->exprs(), 0);

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot, please implement the change according to feedback from Ryan.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added abs() operation after the broadcast and updated the comment as suggested. This ensures the fusion has expressions to verify. Commit: 3b44991


fusion->addOutput(out);

// Verify that no LoadStoreOp with type Set is in the fusion
auto exprs = fusion->exprs();
for (auto expr : exprs) {
if (auto load_store = dynamic_cast<LoadStoreOp*>(expr)) {
EXPECT_NE(load_store->opType(), LoadStoreOpType::Set)
<< "Unexpected Set operation found in fusion with no-op broadcast";
}
}

// Verify the fusion still works correctly
FusionExecutorCache executor_cache(std::move(fusion));
at::Tensor in_tensor =
at::randn({2, 3}, at::dtype(at::kFloat).device(at::kCUDA));
auto out_tensors = executor_cache.runFusionWithInputs({in_tensor});

testValidate(
executor_cache.fusion(), out_tensors, {in_tensor}, __LINE__, __FILE__);
}

} // namespace nvfuser
55 changes: 55 additions & 0 deletions tests/python/test_python_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,61 @@ def fusion_func_3(fd: FusionDefinition):
)
self.assertEqual(eager_out, nvf_out[0])

def test_broadcast_in_dim_no_redundant_set(self):
"""
Test that broadcast_in_dim doesn't introduce redundant Set operations
when all input dimensions are in broadcast_dims (i.e., no actual broadcast).

This verifies the fix for the issue where broadcast_in_dim would create
a redundant float-to-float cast operation via Set when the input already
had the correct shape.
"""
inputs = [
torch.ones(1, 4, device="cuda"),
torch.randn(2, 4, device="cuda"),
]

def fusion_with_broadcast_in_dim(fd: FusionDefinition):
t0 = fd.define_tensor(shape=[1, -1], contiguity=[None, True])
t1 = fd.define_tensor(shape=[-1, -1], contiguity=[True, True])
# broadcast_in_dim with broadcast_dims=[0, 1] means no new dims are added
Copy link

Copilot AI Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The comment states "broadcast_in_dim with broadcast_dims=[0, 1] means no new dims are added" but could be more precise. While technically correct that no new dimensions are added to the tensor (input is 2D, output is 2D), this comment could be clearer. Consider rewording to: "broadcast_in_dim with all input dims in broadcast_dims means no broadcasting operation occurs" or "broadcast_dims=[0, 1] for a 2D input maps all input dimensions, so no new broadcast dimensions are created".

Suggested change
# broadcast_in_dim with broadcast_dims=[0, 1] means no new dims are added
# broadcast_in_dim with all input dims in broadcast_dims means no broadcasting operation occurs

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The review comment is not correct, broadcasting operation occurs and it involves expanding 1-sized dimension.

t2 = fd.ops.broadcast_in_dim(t0, t1.shape(), [0, 1])
t3 = fd.ops.add(t2, t1)
fd.add_output(t3)

def fusion_with_expand(fd: FusionDefinition):
t0 = fd.define_tensor(shape=[1, -1], contiguity=[None, True])
t1 = fd.define_tensor(shape=[-1, -1], contiguity=[True, True])
# Direct expand without broadcast_in_dim
t2 = fd.ops.expand(t0, t1.shape())
t3 = fd.ops.add(t2, t1)
fd.add_output(t3)

# Execute both fusions and verify they produce the same result
nvf_out_bid, fd_bid = self.exec_nvfuser(fusion_with_broadcast_in_dim, inputs)
nvf_out_exp, fd_exp = self.exec_nvfuser(fusion_with_expand, inputs)

# Verify correctness
eager_out = inputs[0] + inputs[1]
self.assertEqual(eager_out, nvf_out_bid[0])
self.assertEqual(eager_out, nvf_out_exp[0])
Comment on lines +700 to +707
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These checks (and inputs) are probably not necessary since the point of the test is just that the IR should match exactly whether we use broadcast_in_dim or expand whenever there is no new broadcast.


# Check that the broadcast_in_dim fusion doesn't have a redundant Set operation
# by comparing the IR string representations
bid_str = str(fd_bid)
exp_str = str(fd_exp)

# Count tensor cast operations (not scalar casts)
bid_tensor_casts = bid_str.count("fd.ops.cast(t")
exp_tensor_casts = exp_str.count("fd.ops.cast(t")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a better way to test what nodes a fusion definition consists of?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, the broadcast is a no-op, so the fusion definitions should be the same.

Suggested change
# Check that the broadcast_in_dim fusion doesn't have a redundant Set operation
# by comparing the IR string representations
bid_str = str(fd_bid)
exp_str = str(fd_exp)
# Count tensor cast operations (not scalar casts)
bid_tensor_casts = bid_str.count("fd.ops.cast(t")
exp_tensor_casts = exp_str.count("fd.ops.cast(t")
# Check that the broadcast_in_dim fusion doesn't have a redundant Set operation
# by comparing the IR string representations
assert str(fd_bid) == str(fd_exp)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to use direct string comparison as suggested. Since the broadcast is a no-op, the fusion definitions should indeed be identical. Commit: 3b44991


# They should have the same number of tensor casts
self.assertEqual(
bid_tensor_casts,
exp_tensor_casts,
f"broadcast_in_dim has {bid_tensor_casts} tensor casts but expand has {exp_tensor_casts}"
)

# Testing a scenario where the broadcast is necessary to realize the output
def test_tensor_shape_with_output_bcast(self):
def fusion_func(fd: FusionDefinition):
Expand Down
Loading