Skip to content
Open
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import com.github.javaparser.StaticJavaParser;
import com.github.javaparser.ast.CompilationUnit;
import com.github.javaparser.ast.ImportDeclaration;
import com.github.javaparser.ast.Modifier;
import com.github.javaparser.ast.Node;
import com.github.javaparser.ast.NodeList;
import com.github.javaparser.ast.PackageDeclaration;
import com.github.javaparser.ast.body.ClassOrInterfaceDeclaration;
import com.github.javaparser.ast.body.FieldDeclaration;
import com.github.javaparser.ast.body.MethodDeclaration;
import com.github.javaparser.ast.body.Parameter;
import com.github.javaparser.ast.body.TypeDeclaration;
Expand Down Expand Up @@ -180,12 +182,35 @@ public void accept(Node node) {
}
if (node instanceof MethodDeclaration) {
MethodDeclaration methodDecl = (MethodDeclaration) node;
export.accept(methodDecl.getType());
for (Parameter param : methodDecl.getParameters()) {
export.accept(param.getType());
// Only export types from non-private methods
if (!methodDecl.isPrivate()) {
export.accept(methodDecl.getType());
for (Parameter param : methodDecl.getParameters()) {
export.accept(param.getType());
}
} else {
// Private methods still consume types internally
consumed.accept(methodDecl.getType());
for (Parameter param : methodDecl.getParameters()) {
consumed.accept(param.getType());
}
}
methodDecl.getThrownExceptions().stream().forEach(consumed);
}
if (node instanceof FieldDeclaration) {
FieldDeclaration fieldDecl = (FieldDeclaration) node;
// Export field types from non-private fields (public, protected, package-private)
if (!fieldDecl.isPrivate()) {
for (VariableDeclarator var : fieldDecl.getVariables()) {
export.accept(var.getType());
}
} else {
// Private fields still consume types internally
for (VariableDeclarator var : fieldDecl.getVariables()) {
consumed.accept(var.getType());
}
}
}
if (node instanceof ClassOrInterfaceDeclaration) {
ClassOrInterfaceDeclaration classOrIntfDecl = (ClassOrInterfaceDeclaration) node;
classOrIntfDecl.getExtendedTypes().stream().forEach(export);
Expand Down
155 changes: 146 additions & 9 deletions src/python/pants/backend/java/dependency_inference/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@
from pants.jvm.target_types import JvmResolveField
from pants.util.ordered_set import FrozenOrderedSet, OrderedSet

# Java standard library package prefixes - types starting with these are always fully qualified
JAVA_STDLIB_PREFIXES = frozenset([
"java.", "javax.", "jakarta.", "jdk.",
"com.sun.", "sun.", # Oracle internal
"org.w3c.", "org.xml.", "org.ietf.", "org.omg.", # Standards
])


@dataclass(frozen=True)
class JavaSourceDependenciesInferenceFieldSet(FieldSet):
Expand Down Expand Up @@ -91,15 +98,13 @@ async def infer_java_dependencies_and_exports_via_source_analysis(
if java_infer_subsystem.consumed_types:
package = analysis.declared_package

# 13545: `analysis.consumed_types` may be unqualified (package-local or imported) or qualified
# (prefixed by package name). Heuristic for now is that if there's a `.` in the type name, it's
# probably fully qualified. This is probably fine for now.
maybe_qualify_types = (
f"{package}.{consumed_type}" if package and "." not in consumed_type else consumed_type
for consumed_type in analysis.consumed_types
)
# Qualify each consumed type, potentially generating multiple candidates
type_candidates: OrderedSet[str] = OrderedSet()
for consumed_type in analysis.consumed_types:
candidates = qualify_consumed_type(consumed_type, package, analysis.imports)
type_candidates.update(candidates)

types.update(maybe_qualify_types)
types.update(type_candidates)

# Resolve the export types into (probable) types:
# First produce a map of known consumed unqualified types to possible qualified names
Expand All @@ -122,7 +127,7 @@ async def infer_java_dependencies_and_exports_via_source_analysis(
dependencies: OrderedSet[Address] = OrderedSet()
exports: OrderedSet[Address] = OrderedSet()
for typ in types:
for matches in symbol_mapping.addresses_for_symbol(typ, resolve).values():
for matches in lookup_type_with_fallback(typ, symbol_mapping, resolve).values():
explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference(
matches,
address,
Expand All @@ -140,6 +145,48 @@ async def infer_java_dependencies_and_exports_via_source_analysis(
explicitly_provided_exports = set(matches) & set(explicitly_provided_deps.includes)
exports.update(explicitly_provided_exports)

# For each direct dependency, resolve its exported types as transitive dependencies
# This handles the case where A imports B, and B has a field of type C
# Even though A never imports C, the Java compiler needs C's class file
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know the JVM backend too well, but I guess this is necessary because we only put the direct deps of A on the compiler classpath? So we have to treat C as a direct dep.

But shouldn't we really be putting all transitive deps on the compiler classpath? After all, couldn't we contrive a more complex situation where C has a reference to some D etc?

And if we did so then presumably this problem wouldn't occur, because we do detect that B depends on C and A depends on B, and therefore C is in the transitive closure?

Or are there reasons not to put all transitive dep classes classpath?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. In my experience, all transitive deps should be on the classpath when compiling Java sources, so I'm not sure why we're not doing it here. I agree that's a better approach. Let me revisit this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming the reason for not just putting all transitive dep classes on the classpath was potentially performance related? (although I didn't run a performance test with and without this change) Putting all transitive dep classes on the classpath is good from a correctness standpoint and obviates the need for us to put special logic in place for exporting certain classes and references ahead to other consumers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly, I'll see if I can find someone who has context for this decision. As far as I can tell (and AI seems to agree) compiling A conceptually requires all its transitive deps, and trying to tease out which ones are "really" needed is a mug's game.

I had thought we were already doing this though, I'm a bit surprised if we're not. But if we were your use case would work, no? I assume we do correctly infer the deps A->B and B->C already?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct, if we were already doing this, then I wouldn't have had compilation errors in my project for the A -> B -> C use case. But the other issue this PR is solving around inner classes still would have been a problem, but now it seems the two are unrelated aside from the fact they require fixes to the same files

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it probably makes sense to split this into two PRs. The inner classes one is fairly straightforward and we can get it in ASAP. The other is a potentially more substantial change that would be best reviewed in isolation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's a good idea. I've split the inner class changes out into #22889 I'll rebase this PR to just include the transitive dep stuff after that other PR lands.

direct_dependencies = list(dependencies)
transitive_from_exports: OrderedSet[Address] = OrderedSet()

for dep_address in direct_dependencies:
# Only process first-party Java sources (not third-party artifacts)
dep_wrapped = await resolve_target(
WrappedTargetRequest(dep_address, description_of_origin="<infallible>"), **implicitly()
)
dep_tgt = dep_wrapped.target

# Skip if not a Java source file
if not dep_tgt.has_field(JavaSourceField):
continue

# Get the dependency's source analysis
dep_source_files = await determine_source_files(SourceFilesRequest([dep_tgt[JavaSourceField]]))
dep_analysis = await resolve_fallible_result_to_analysis(
**implicitly(JavaSourceDependencyAnalysisRequest(source_files=dep_source_files))
)

# Get the dependency's package for qualifying its export types
dep_package = dep_analysis.declared_package

# Qualify each export type from the dependency
for export_type in dep_analysis.export_types:
# The export types from the dependency's analysis are from that file's perspective
# We need to qualify them based on the dependency's package and imports
export_candidates = qualify_consumed_type(export_type, dep_package, dep_analysis.imports)

for qualified_export in export_candidates:
# Look up the export type in the symbol map
for matches in lookup_type_with_fallback(qualified_export, symbol_mapping, resolve).values():
maybe_disambiguated = explicitly_provided_deps.disambiguated(matches)
if maybe_disambiguated and maybe_disambiguated != dep_address:
transitive_from_exports.add(maybe_disambiguated)

# Add transitive dependencies from exports
dependencies.update(transitive_from_exports)

# Files do not export themselves. Don't be silly.
if address in exports:
exports.remove(address)
Expand All @@ -157,6 +204,96 @@ async def infer_java_dependencies_via_source_analysis(
return InferredDependencies(jids.dependencies)


def qualify_consumed_type(
type_name: str,
source_package: str | None,
imports: tuple[JavaImport, ...],
) -> tuple[str, ...]:
"""
Qualify a consumed type name, returning possible qualified names to try.

Returns a tuple of candidates in priority order. The symbol map should be checked
for each candidate until a match is found.

Args:
type_name: The type name as it appears in the source (may be qualified or unqualified)
source_package: The package of the source file, or None if unnamed package
imports: The imports declared in the source file

Returns:
Tuple of possible fully-qualified type names, in priority order
"""
# Case 1: No dots → definitely unqualified, needs package prefix
if "." not in type_name:
if source_package:
return (f"{source_package}.{type_name}",)
else:
return (type_name,) # Unnamed package

# Case 2: Known JDK/stdlib type → already fully qualified
if any(type_name.startswith(prefix) for prefix in JAVA_STDLIB_PREFIXES):
return (type_name,)

# Case 3: Type fully qualified name appears in imports → already resolved
import_names = {imp.name for imp in imports}
if type_name in import_names:
return (type_name,)

# Case 4: Outer class is imported → resolve inner class through import
# E.g., "B.InnerB" where "com.other.B" is imported → "com.other.B.InnerB"
first_part = type_name.split(".")[0]
for imp in imports:
if imp.name.endswith(f".{first_part}"):
# Found import for outer class, construct fully qualified inner class name
qualified = imp.name + type_name[len(first_part):]
return (qualified,)

# Case 5: Ambiguous - has dots but not stdlib, not imported
# Most likely: same-package inner class like "B.InnerB" → "com.example.B.InnerB"
# Less likely: third-party FQTN without import
if source_package:
# Try same-package first (most common), then as-is (fallback for third-party)
return (f"{source_package}.{type_name}", type_name)
else:
return (type_name,)


def lookup_type_with_fallback(
typ: str,
symbol_mapping: SymbolMapping,
resolve: str
) -> dict[str, FrozenOrderedSet[Address]]:
"""
Look up a type in the symbol map, with fallback to parent types for inner classes.

Args:
typ: Fully qualified type name (e.g., "com.example.B.InnerB")
symbol_mapping: The symbol map to search
resolve: The JVM resolve to search within

Returns:
Dict mapping namespaces to addresses, empty if no match found
"""
# Try exact match first
matches = symbol_mapping.addresses_for_symbol(typ, resolve)
if matches:
return matches

# If not found and typ looks like it might be an inner class (has dots after package)
# Try stripping inner class parts one by one
# E.g., "com.example.B.InnerB" → try "com.example.B"
# "com.example.Outer.Middle.Inner" → try "com.example.Outer.Middle", then "com.example.Outer"
parts = typ.split(".")
if len(parts) > 2: # At least package + outer + inner
for i in range(len(parts) - 1, 1, -1): # Don't try single-part names
parent_type = ".".join(parts[:i])
matches = symbol_mapping.addresses_for_symbol(parent_type, resolve)
if matches:
return matches

return {}


def dependency_name(imp: JavaImport):
if imp.is_static and not imp.is_asterisk:
return imp.name.rsplit(".", maxsplit=1)[0]
Expand Down
Loading